From ac713c9094572b9b159bf993d855f8c75d18af29 Mon Sep 17 00:00:00 2001 From: "m.huber" Date: Mon, 11 Mar 2024 22:25:05 +0100 Subject: [PATCH 01/14] let code_indexer use an SearchOptions struct for PerformSearch --- modules/indexer/code/bleve/bleve.go | 26 +++++++++---------- .../code/elasticsearch/elasticsearch.go | 26 ++++++++----------- modules/indexer/code/indexer_test.go | 11 +++++++- modules/indexer/code/internal/indexer.go | 15 +++++++++-- modules/indexer/code/search.go | 8 +++--- routers/web/explore/code.go | 12 ++++++++- routers/web/repo/search.go | 13 ++++++++-- routers/web/user/code.go | 12 ++++++++- 8 files changed, 85 insertions(+), 38 deletions(-) diff --git a/modules/indexer/code/bleve/bleve.go b/modules/indexer/code/bleve/bleve.go index 107dd23598d1b..d7f735e957db9 100644 --- a/modules/indexer/code/bleve/bleve.go +++ b/modules/indexer/code/bleve/bleve.go @@ -142,7 +142,7 @@ func (b *Indexer) addUpdate(ctx context.Context, batchWriter git.WriteCloserErro return err } if size, err = strconv.ParseInt(strings.TrimSpace(stdout), 10, 64); err != nil { - return fmt.Errorf("Misformatted git cat-file output: %w", err) + return fmt.Errorf("misformatted git cat-file output: %w", err) } } @@ -233,26 +233,26 @@ func (b *Indexer) Delete(_ context.Context, repoID int64) error { // Search searches for files in the specified repo. // Returns the matching file-paths -func (b *Indexer) Search(ctx context.Context, repoIDs []int64, language, keyword string, page, pageSize int, isFuzzy bool) (int64, []*internal.SearchResult, []*internal.SearchResultLanguages, error) { +func (b *Indexer) Search(ctx context.Context, opts *internal.SearchOptions) (int64, []*internal.SearchResult, []*internal.SearchResultLanguages, error) { var ( indexerQuery query.Query keywordQuery query.Query ) - if isFuzzy { - phraseQuery := bleve.NewMatchPhraseQuery(keyword) + if opts.IsKeywordFuzzy { + phraseQuery := bleve.NewMatchPhraseQuery(opts.Keyword) phraseQuery.FieldVal = "Content" phraseQuery.Analyzer = repoIndexerAnalyzer keywordQuery = phraseQuery } else { - prefixQuery := bleve.NewPrefixQuery(keyword) + prefixQuery := bleve.NewPrefixQuery(opts.Keyword) prefixQuery.FieldVal = "Content" keywordQuery = prefixQuery } - if len(repoIDs) > 0 { - repoQueries := make([]query.Query, 0, len(repoIDs)) - for _, repoID := range repoIDs { + if len(opts.RepoIDs) > 0 { + repoQueries := make([]query.Query, 0, len(opts.RepoIDs)) + for _, repoID := range opts.RepoIDs { repoQueries = append(repoQueries, inner_bleve.NumericEqualityQuery(repoID, "RepoID")) } @@ -266,8 +266,8 @@ func (b *Indexer) Search(ctx context.Context, repoIDs []int64, language, keyword // Save for reuse without language filter facetQuery := indexerQuery - if len(language) > 0 { - languageQuery := bleve.NewMatchQuery(language) + if len(opts.Language) > 0 { + languageQuery := bleve.NewMatchQuery(opts.Language) languageQuery.FieldVal = "Language" languageQuery.Analyzer = analyzer_keyword.Name @@ -277,12 +277,12 @@ func (b *Indexer) Search(ctx context.Context, repoIDs []int64, language, keyword ) } - from := (page - 1) * pageSize + from, pageSize := opts.GetSkipTake() searchRequest := bleve.NewSearchRequestOptions(indexerQuery, pageSize, from, false) searchRequest.Fields = []string{"Content", "RepoID", "Language", "CommitID", "UpdatedAt"} searchRequest.IncludeLocations = true - if len(language) == 0 { + if len(opts.Language) == 0 { searchRequest.AddFacet("languages", bleve.NewFacetRequest("Language", 10)) } @@ -326,7 +326,7 @@ func (b *Indexer) Search(ctx context.Context, repoIDs []int64, language, keyword } searchResultLanguages := make([]*internal.SearchResultLanguages, 0, 10) - if len(language) > 0 { + if len(opts.Language) > 0 { // Use separate query to go get all language counts facetRequest := bleve.NewSearchRequestOptions(facetQuery, 1, 0, false) facetRequest.Fields = []string{"Content", "RepoID", "Language", "CommitID", "UpdatedAt"} diff --git a/modules/indexer/code/elasticsearch/elasticsearch.go b/modules/indexer/code/elasticsearch/elasticsearch.go index 065b0b20618e7..e4622fd66ef95 100644 --- a/modules/indexer/code/elasticsearch/elasticsearch.go +++ b/modules/indexer/code/elasticsearch/elasticsearch.go @@ -281,18 +281,18 @@ func extractAggs(searchResult *elastic.SearchResult) []*internal.SearchResultLan } // Search searches for codes and language stats by given conditions. -func (b *Indexer) Search(ctx context.Context, repoIDs []int64, language, keyword string, page, pageSize int, isFuzzy bool) (int64, []*internal.SearchResult, []*internal.SearchResultLanguages, error) { +func (b *Indexer) Search(ctx context.Context, opts *internal.SearchOptions) (int64, []*internal.SearchResult, []*internal.SearchResultLanguages, error) { searchType := esMultiMatchTypePhrasePrefix - if isFuzzy { + if opts.IsKeywordFuzzy { searchType = esMultiMatchTypeBestFields } - kwQuery := elastic.NewMultiMatchQuery(keyword, "content").Type(searchType) + kwQuery := elastic.NewMultiMatchQuery(opts.Keyword, "content").Type(searchType) query := elastic.NewBoolQuery() query = query.Must(kwQuery) - if len(repoIDs) > 0 { - repoStrs := make([]any, 0, len(repoIDs)) - for _, repoID := range repoIDs { + if len(opts.RepoIDs) > 0 { + repoStrs := make([]any, 0, len(opts.RepoIDs)) + for _, repoID := range opts.RepoIDs { repoStrs = append(repoStrs, repoID) } repoQuery := elastic.NewTermsQuery("repo_id", repoStrs...) @@ -300,16 +300,12 @@ func (b *Indexer) Search(ctx context.Context, repoIDs []int64, language, keyword } var ( - start int - kw = "" + keyword + "" - aggregation = elastic.NewTermsAggregation().Field("language").Size(10).OrderByCountDesc() + start, pageSize = opts.GetSkipTake() + kw = "" + opts.Keyword + "" + aggregation = elastic.NewTermsAggregation().Field("language").Size(10).OrderByCountDesc() ) - if page > 0 { - start = (page - 1) * pageSize - } - - if len(language) == 0 { + if len(opts.Language) == 0 { searchResult, err := b.inner.Client.Search(). Index(b.inner.VersionedIndexName()). Aggregation("language", aggregation). @@ -330,7 +326,7 @@ func (b *Indexer) Search(ctx context.Context, repoIDs []int64, language, keyword return convertResult(searchResult, kw, pageSize) } - langQuery := elastic.NewMatchQuery("language", language) + langQuery := elastic.NewMatchQuery("language", opts.Language) countResult, err := b.inner.Client.Search(). Index(b.inner.VersionedIndexName()). Aggregation("language", aggregation). diff --git a/modules/indexer/code/indexer_test.go b/modules/indexer/code/indexer_test.go index 23dbd63410541..8975c5ce4083b 100644 --- a/modules/indexer/code/indexer_test.go +++ b/modules/indexer/code/indexer_test.go @@ -8,6 +8,7 @@ import ( "os" "testing" + "code.gitea.io/gitea/models/db" "code.gitea.io/gitea/models/unittest" "code.gitea.io/gitea/modules/git" "code.gitea.io/gitea/modules/indexer/code/bleve" @@ -70,7 +71,15 @@ func testIndexer(name string, t *testing.T, indexer internal.Indexer) { for _, kw := range keywords { t.Run(kw.Keyword, func(t *testing.T) { - total, res, langs, err := indexer.Search(context.TODO(), kw.RepoIDs, "", kw.Keyword, 1, 10, true) + total, res, langs, err := indexer.Search(context.TODO(), &internal.SearchOptions{ + RepoIDs: kw.RepoIDs, + Keyword: kw.Keyword, + Paginator: &db.ListOptions{ + Page: 1, + PageSize: 10, + }, + IsKeywordFuzzy: true, + }) assert.NoError(t, err) assert.Len(t, kw.IDs, int(total)) assert.Len(t, langs, kw.Langs) diff --git a/modules/indexer/code/internal/indexer.go b/modules/indexer/code/internal/indexer.go index c92419deb22f7..c259fcd26eb6f 100644 --- a/modules/indexer/code/internal/indexer.go +++ b/modules/indexer/code/internal/indexer.go @@ -7,6 +7,7 @@ import ( "context" "fmt" + "code.gitea.io/gitea/models/db" repo_model "code.gitea.io/gitea/models/repo" "code.gitea.io/gitea/modules/indexer/internal" ) @@ -16,7 +17,17 @@ type Indexer interface { internal.Indexer Index(ctx context.Context, repo *repo_model.Repository, sha string, changes *RepoChanges) error Delete(ctx context.Context, repoID int64) error - Search(ctx context.Context, repoIDs []int64, language, keyword string, page, pageSize int, isFuzzy bool) (int64, []*SearchResult, []*SearchResultLanguages, error) + Search(ctx context.Context, opts *SearchOptions) (int64, []*SearchResult, []*SearchResultLanguages, error) +} + +type SearchOptions struct { + RepoIDs []int64 + Keyword string + Language string + + IsKeywordFuzzy bool + + db.Paginator } // NewDummyIndexer returns a dummy indexer @@ -38,6 +49,6 @@ func (d *dummyIndexer) Delete(ctx context.Context, repoID int64) error { return fmt.Errorf("indexer is not ready") } -func (d *dummyIndexer) Search(ctx context.Context, repoIDs []int64, language, keyword string, page, pageSize int, isFuzzy bool) (int64, []*SearchResult, []*SearchResultLanguages, error) { +func (d *dummyIndexer) Search(ctx context.Context, opts *SearchOptions) (int64, []*SearchResult, []*SearchResultLanguages, error) { return 0, nil, nil, fmt.Errorf("indexer is not ready") } diff --git a/modules/indexer/code/search.go b/modules/indexer/code/search.go index 89a62a8d3e2dd..f0d5047ad3c2b 100644 --- a/modules/indexer/code/search.go +++ b/modules/indexer/code/search.go @@ -32,6 +32,8 @@ type ResultLine struct { type SearchResultLanguages = internal.SearchResultLanguages +type SearchOptions = internal.SearchOptions + func indices(content string, selectionStartIndex, selectionEndIndex int) (int, int) { startIndex := selectionStartIndex numLinesBefore := 0 @@ -125,12 +127,12 @@ func searchResult(result *internal.SearchResult, startIndex, endIndex int) (*Res // PerformSearch perform a search on a repository // if isFuzzy is true set the Damerau-Levenshtein distance from 0 to 2 -func PerformSearch(ctx context.Context, repoIDs []int64, language, keyword string, page, pageSize int, isFuzzy bool) (int, []*Result, []*internal.SearchResultLanguages, error) { - if len(keyword) == 0 { +func PerformSearch(ctx context.Context, opts *internal.SearchOptions) (int, []*Result, []*internal.SearchResultLanguages, error) { + if opts == nil || len(opts.Keyword) == 0 { return 0, nil, nil, nil } - total, results, resultLanguages, err := (*globalIndexer.Load()).Search(ctx, repoIDs, language, keyword, page, pageSize, isFuzzy) + total, results, resultLanguages, err := (*globalIndexer.Load()).Search(ctx, opts) if err != nil { return 0, nil, nil, err } diff --git a/routers/web/explore/code.go b/routers/web/explore/code.go index a6bc71ac9cde0..75bd0f3d2462d 100644 --- a/routers/web/explore/code.go +++ b/routers/web/explore/code.go @@ -6,6 +6,7 @@ package explore import ( "net/http" + "code.gitea.io/gitea/models/db" repo_model "code.gitea.io/gitea/models/repo" "code.gitea.io/gitea/modules/base" code_indexer "code.gitea.io/gitea/modules/indexer/code" @@ -77,7 +78,16 @@ func Code(ctx *context.Context) { ) if (len(repoIDs) > 0) || isAdmin { - total, searchResults, searchResultLanguages, err = code_indexer.PerformSearch(ctx, repoIDs, language, keyword, page, setting.UI.RepoSearchPagingNum, isFuzzy) + total, searchResults, searchResultLanguages, err = code_indexer.PerformSearch(ctx, &code_indexer.SearchOptions{ + RepoIDs: repoIDs, + Keyword: keyword, + IsKeywordFuzzy: isFuzzy, + Language: language, + Paginator: &db.ListOptions{ + Page: page, + PageSize: setting.UI.RepoSearchPagingNum, + }, + }) if err != nil { if code_indexer.IsAvailable(ctx) { ctx.ServerError("SearchResults", err) diff --git a/routers/web/repo/search.go b/routers/web/repo/search.go index 766dd5726aa8d..042e5d3c711ca 100644 --- a/routers/web/repo/search.go +++ b/routers/web/repo/search.go @@ -6,6 +6,7 @@ package repo import ( "net/http" + "code.gitea.io/gitea/models/db" "code.gitea.io/gitea/modules/base" code_indexer "code.gitea.io/gitea/modules/indexer/code" "code.gitea.io/gitea/modules/setting" @@ -42,8 +43,16 @@ func Search(ctx *context.Context) { page = 1 } - total, searchResults, searchResultLanguages, err := code_indexer.PerformSearch(ctx, []int64{ctx.Repo.Repository.ID}, - language, keyword, page, setting.UI.RepoSearchPagingNum, isFuzzy) + total, searchResults, searchResultLanguages, err := code_indexer.PerformSearch(ctx, &code_indexer.SearchOptions{ + RepoIDs: []int64{ctx.Repo.Repository.ID}, + Keyword: keyword, + IsKeywordFuzzy: isFuzzy, + Language: language, + Paginator: &db.ListOptions{ + Page: page, + PageSize: setting.UI.RepoSearchPagingNum, + }, + }) if err != nil { if code_indexer.IsAvailable(ctx) { ctx.ServerError("SearchResults", err) diff --git a/routers/web/user/code.go b/routers/web/user/code.go index 8613d38b65a66..d2afdd89051ef 100644 --- a/routers/web/user/code.go +++ b/routers/web/user/code.go @@ -6,6 +6,7 @@ package user import ( "net/http" + "code.gitea.io/gitea/models/db" repo_model "code.gitea.io/gitea/models/repo" "code.gitea.io/gitea/modules/base" code_indexer "code.gitea.io/gitea/modules/indexer/code" @@ -75,7 +76,16 @@ func CodeSearch(ctx *context.Context) { ) if len(repoIDs) > 0 { - total, searchResults, searchResultLanguages, err = code_indexer.PerformSearch(ctx, repoIDs, language, keyword, page, setting.UI.RepoSearchPagingNum, isFuzzy) + total, searchResults, searchResultLanguages, err = code_indexer.PerformSearch(ctx, &code_indexer.SearchOptions{ + RepoIDs: repoIDs, + Keyword: keyword, + IsKeywordFuzzy: isFuzzy, + Language: language, + Paginator: &db.ListOptions{ + Page: page, + PageSize: setting.UI.RepoSearchPagingNum, + }, + }) if err != nil { if code_indexer.IsAvailable(ctx) { ctx.ServerError("SearchResults", err) From c0e547edbdadeb4b4b7736680a757dcd4ac8f9bf Mon Sep 17 00:00:00 2001 From: "m.huber" Date: Mon, 11 Mar 2024 22:35:33 +0100 Subject: [PATCH 02/14] document new repo indexer type "wikis" --- custom/conf/app.example.ini | 4 ++-- docs/content/administration/config-cheat-sheet.en-us.md | 2 +- modules/setting/indexer.go | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/custom/conf/app.example.ini b/custom/conf/app.example.ini index 17d6cd3a35e02..dd2d32fdf03c1 100644 --- a/custom/conf/app.example.ini +++ b/custom/conf/app.example.ini @@ -1406,9 +1406,9 @@ LEVEL = Info ;; repo indexer by default disabled, since it uses a lot of disk space ;REPO_INDEXER_ENABLED = false ;; -;; repo indexer units, the items to index, could be `sources`, `forks`, `mirrors`, `templates` or any combination of them separated by a comma. +;; repo indexer units, the items to index, could be `sources`, `forks`, `mirrors`, `templates`, `wikis` or any combination of them separated by a comma. ;; If empty then it defaults to `sources` only, as if you'd like to disable fully please see REPO_INDEXER_ENABLED. -;REPO_INDEXER_REPO_TYPES = sources,forks,mirrors,templates +;REPO_INDEXER_REPO_TYPES = sources,forks,mirrors,templates,wikis ;; ;; Code search engine type, could be `bleve` or `elasticsearch`. ;REPO_INDEXER_TYPE = bleve diff --git a/docs/content/administration/config-cheat-sheet.en-us.md b/docs/content/administration/config-cheat-sheet.en-us.md index 43ec470ad0ecb..1f1a533c04406 100644 --- a/docs/content/administration/config-cheat-sheet.en-us.md +++ b/docs/content/administration/config-cheat-sheet.en-us.md @@ -473,7 +473,7 @@ relation to port exhaustion. - `ISSUE_INDEXER_PATH`: **indexers/issues.bleve**: Index file used for issue search; available when ISSUE_INDEXER_TYPE is bleve and elasticsearch. Relative paths will be made absolute against _`AppWorkPath`_. - `REPO_INDEXER_ENABLED`: **false**: Enables code search (uses a lot of disk space, about 6 times more than the repository size). -- `REPO_INDEXER_REPO_TYPES`: **sources,forks,mirrors,templates**: Repo indexer units. The items to index could be `sources`, `forks`, `mirrors`, `templates` or any combination of them separated by a comma. If empty then it defaults to `sources` only, as if you'd like to disable fully please see `REPO_INDEXER_ENABLED`. +- `REPO_INDEXER_REPO_TYPES`: **sources,forks,mirrors,templates,wikis**: Repo indexer units. The items to index could be `sources`, `forks`, `mirrors`, `templates`, `wikis` or any combination of them separated by a comma. If empty then it defaults to `sources` only, as if you'd like to disable fully please see `REPO_INDEXER_ENABLED`. - `REPO_INDEXER_TYPE`: **bleve**: Code search engine type, could be `bleve` or `elasticsearch`. - `REPO_INDEXER_PATH`: **indexers/repos.bleve**: Index file used for code search. - `REPO_INDEXER_CONN_STR`: ****: Code indexer connection string, available when `REPO_INDEXER_TYPE` is elasticsearch. i.e. http://elastic:password@localhost:9200 diff --git a/modules/setting/indexer.go b/modules/setting/indexer.go index 15f61502427db..0cb975b25208d 100644 --- a/modules/setting/indexer.go +++ b/modules/setting/indexer.go @@ -76,7 +76,7 @@ func loadIndexerFrom(rootCfg ConfigProvider) { Indexer.IssueIndexerName = sec.Key("ISSUE_INDEXER_NAME").MustString(Indexer.IssueIndexerName) Indexer.RepoIndexerEnabled = sec.Key("REPO_INDEXER_ENABLED").MustBool(false) - Indexer.RepoIndexerRepoTypes = strings.Split(sec.Key("REPO_INDEXER_REPO_TYPES").MustString("sources,forks,mirrors,templates"), ",") + Indexer.RepoIndexerRepoTypes = strings.Split(sec.Key("REPO_INDEXER_REPO_TYPES").MustString("sources,forks,mirrors,templates,wikis"), ",") Indexer.RepoType = sec.Key("REPO_INDEXER_TYPE").MustString("bleve") Indexer.RepoPath = filepath.ToSlash(sec.Key("REPO_INDEXER_PATH").MustString(filepath.ToSlash(filepath.Join(AppDataPath, "indexers/repos.bleve")))) if !filepath.IsAbs(Indexer.RepoPath) { From e21ff97159e6d7d39acf17c9e131d6aa374c10f6 Mon Sep 17 00:00:00 2001 From: "m.huber" Date: Mon, 11 Mar 2024 23:25:29 +0100 Subject: [PATCH 03/14] Add wiki support to code indexer backends --- modules/indexer/code/bleve/bleve.go | 24 ++++++++---- .../code/elasticsearch/elasticsearch.go | 37 ++++++++++++++----- modules/indexer/code/internal/indexer.go | 3 ++ modules/indexer/code/internal/model.go | 2 + modules/indexer/code/internal/util.go | 30 +++++++++------ modules/indexer/code/internal/util_test.go | 27 ++++++++++++++ 6 files changed, 94 insertions(+), 29 deletions(-) create mode 100644 modules/indexer/code/internal/util_test.go diff --git a/modules/indexer/code/bleve/bleve.go b/modules/indexer/code/bleve/bleve.go index d7f735e957db9..44ec50a3c3d32 100644 --- a/modules/indexer/code/bleve/bleve.go +++ b/modules/indexer/code/bleve/bleve.go @@ -51,6 +51,7 @@ func addUnicodeNormalizeTokenFilter(m *mapping.IndexMappingImpl) error { // RepoIndexerData data stored in the repo indexer type RepoIndexerData struct { RepoID int64 + IsWiki bool CommitID string Content string Language string @@ -65,7 +66,7 @@ func (d *RepoIndexerData) Type() string { const ( repoIndexerAnalyzer = "repoIndexerAnalyzer" repoIndexerDocType = "repoIndexerDocType" - repoIndexerLatestVersion = 6 + repoIndexerLatestVersion = 7 ) // generateBleveIndexMapping generates a bleve index mapping for the repo indexer @@ -75,6 +76,10 @@ func generateBleveIndexMapping() (mapping.IndexMapping, error) { numericFieldMapping.IncludeInAll = false docMapping.AddFieldMappingsAt("RepoID", numericFieldMapping) + boolFieldMapping := bleve.NewBooleanFieldMapping() + boolFieldMapping.IncludeInAll = false + docMapping.AddFieldMappingsAt("IsWiki", boolFieldMapping) + textFieldMapping := bleve.NewTextFieldMapping() textFieldMapping.IncludeInAll = false docMapping.AddFieldMappingsAt("Content", textFieldMapping) @@ -125,7 +130,7 @@ func NewIndexer(indexDir string) *Indexer { } func (b *Indexer) addUpdate(ctx context.Context, batchWriter git.WriteCloserError, batchReader *bufio.Reader, commitSha string, - update internal.FileUpdate, repo *repo_model.Repository, batch *inner_bleve.FlushingBatch, + update internal.FileUpdate, repo *repo_model.Repository, isWiki bool, batch *inner_bleve.FlushingBatch, ) error { // Ignore vendored files in code search if setting.Indexer.ExcludeVendored && analyze.IsVendor(update.Filename) { @@ -147,7 +152,7 @@ func (b *Indexer) addUpdate(ctx context.Context, batchWriter git.WriteCloserErro } if size > setting.Indexer.MaxIndexerFileSize { - return b.addDelete(update.Filename, repo, batch) + return b.addDelete(update.Filename, repo, isWiki, batch) } if _, err := batchWriter.Write([]byte(update.BlobSha + "\n")); err != nil { @@ -170,9 +175,10 @@ func (b *Indexer) addUpdate(ctx context.Context, batchWriter git.WriteCloserErro if _, err = batchReader.Discard(1); err != nil { return err } - id := internal.FilenameIndexerID(repo.ID, update.Filename) + id := internal.FilenameIndexerID(repo.ID, isWiki, update.Filename) return batch.Index(id, &RepoIndexerData{ RepoID: repo.ID, + IsWiki: isWiki, CommitID: commitSha, Content: string(charset.ToUTF8DropErrors(fileContents, charset.ConvertOpts{})), Language: analyze.GetCodeLanguage(update.Filename, fileContents), @@ -180,13 +186,15 @@ func (b *Indexer) addUpdate(ctx context.Context, batchWriter git.WriteCloserErro }) } -func (b *Indexer) addDelete(filename string, repo *repo_model.Repository, batch *inner_bleve.FlushingBatch) error { - id := internal.FilenameIndexerID(repo.ID, filename) +func (b *Indexer) addDelete(filename string, repo *repo_model.Repository, isWiki bool, batch *inner_bleve.FlushingBatch) error { + id := internal.FilenameIndexerID(repo.ID, isWiki, filename) return batch.Delete(id) } // Index indexes the data func (b *Indexer) Index(ctx context.Context, repo *repo_model.Repository, sha string, changes *internal.RepoChanges) error { + isWiki := false // TODO + batch := inner_bleve.NewFlushingBatch(b.inner.Indexer, maxBatchSize) if len(changes.Updates) > 0 { @@ -200,14 +208,14 @@ func (b *Indexer) Index(ctx context.Context, repo *repo_model.Repository, sha st defer cancel() for _, update := range changes.Updates { - if err := b.addUpdate(ctx, batchWriter, batchReader, sha, update, repo, batch); err != nil { + if err := b.addUpdate(ctx, batchWriter, batchReader, sha, update, repo, isWiki, batch); err != nil { return err } } cancel() } for _, filename := range changes.RemovedFilenames { - if err := b.addDelete(filename, repo, batch); err != nil { + if err := b.addDelete(filename, repo, isWiki, batch); err != nil { return err } } diff --git a/modules/indexer/code/elasticsearch/elasticsearch.go b/modules/indexer/code/elasticsearch/elasticsearch.go index e4622fd66ef95..f1c870856a705 100644 --- a/modules/indexer/code/elasticsearch/elasticsearch.go +++ b/modules/indexer/code/elasticsearch/elasticsearch.go @@ -20,6 +20,7 @@ import ( inner_elasticsearch "code.gitea.io/gitea/modules/indexer/internal/elasticsearch" "code.gitea.io/gitea/modules/json" "code.gitea.io/gitea/modules/log" + "code.gitea.io/gitea/modules/optional" "code.gitea.io/gitea/modules/setting" "code.gitea.io/gitea/modules/timeutil" "code.gitea.io/gitea/modules/typesniffer" @@ -29,7 +30,7 @@ import ( ) const ( - esRepoIndexerLatestVersion = 1 + esRepoIndexerLatestVersion = 2 // multi-match-types, currently only 2 types are used // Reference: https://www.elastic.co/guide/en/elasticsearch/reference/7.0/query-dsl-multi-match-query.html#multi-match-types esMultiMatchTypeBestFields = "best_fields" @@ -62,6 +63,10 @@ const ( "type": "long", "index": true }, + "is_wiki": { + "type": "boolean" + "index": true + } "content": { "type": "text", "term_vector": "with_positions_offsets", @@ -84,7 +89,7 @@ const ( }` ) -func (b *Indexer) addUpdate(ctx context.Context, batchWriter git.WriteCloserError, batchReader *bufio.Reader, sha string, update internal.FileUpdate, repo *repo_model.Repository) ([]elastic.BulkableRequest, error) { +func (b *Indexer) addUpdate(ctx context.Context, batchWriter git.WriteCloserError, batchReader *bufio.Reader, sha string, update internal.FileUpdate, repo *repo_model.Repository, isWiki bool) ([]elastic.BulkableRequest, error) { // Ignore vendored files in code search if setting.Indexer.ExcludeVendored && analyze.IsVendor(update.Filename) { return nil, nil @@ -104,7 +109,7 @@ func (b *Indexer) addUpdate(ctx context.Context, batchWriter git.WriteCloserErro } if size > setting.Indexer.MaxIndexerFileSize { - return []elastic.BulkableRequest{b.addDelete(update.Filename, repo)}, nil + return []elastic.BulkableRequest{b.addDelete(update.Filename, repo, isWiki)}, nil } if _, err := batchWriter.Write([]byte(update.BlobSha + "\n")); err != nil { @@ -127,7 +132,7 @@ func (b *Indexer) addUpdate(ctx context.Context, batchWriter git.WriteCloserErro if _, err = batchReader.Discard(1); err != nil { return nil, err } - id := internal.FilenameIndexerID(repo.ID, update.Filename) + id := internal.FilenameIndexerID(repo.ID, isWiki, update.Filename) return []elastic.BulkableRequest{ elastic.NewBulkIndexRequest(). @@ -135,6 +140,7 @@ func (b *Indexer) addUpdate(ctx context.Context, batchWriter git.WriteCloserErro Id(id). Doc(map[string]any{ "repo_id": repo.ID, + "is_wiki": isWiki, "content": string(charset.ToUTF8DropErrors(fileContents, charset.ConvertOpts{})), "commit_id": sha, "language": analyze.GetCodeLanguage(update.Filename, fileContents), @@ -143,8 +149,8 @@ func (b *Indexer) addUpdate(ctx context.Context, batchWriter git.WriteCloserErro }, nil } -func (b *Indexer) addDelete(filename string, repo *repo_model.Repository) elastic.BulkableRequest { - id := internal.FilenameIndexerID(repo.ID, filename) +func (b *Indexer) addDelete(filename string, repo *repo_model.Repository, isWiki bool) elastic.BulkableRequest { + id := internal.FilenameIndexerID(repo.ID, isWiki, filename) return elastic.NewBulkDeleteRequest(). Index(b.inner.VersionedIndexName()). Id(id) @@ -152,6 +158,8 @@ func (b *Indexer) addDelete(filename string, repo *repo_model.Repository) elasti // Index will save the index data func (b *Indexer) Index(ctx context.Context, repo *repo_model.Repository, sha string, changes *internal.RepoChanges) error { + isWiki := false // TODO + reqs := make([]elastic.BulkableRequest, 0) if len(changes.Updates) > 0 { // Now because of some insanity with git cat-file not immediately failing if not run in a valid git directory we need to run git rev-parse first! @@ -164,7 +172,7 @@ func (b *Indexer) Index(ctx context.Context, repo *repo_model.Repository, sha st defer cancel() for _, update := range changes.Updates { - updateReqs, err := b.addUpdate(ctx, batchWriter, batchReader, sha, update, repo) + updateReqs, err := b.addUpdate(ctx, batchWriter, batchReader, sha, update, repo, isWiki) if err != nil { return err } @@ -176,7 +184,7 @@ func (b *Indexer) Index(ctx context.Context, repo *repo_model.Repository, sha st } for _, filename := range changes.RemovedFilenames { - reqs = append(reqs, b.addDelete(filename, repo)) + reqs = append(reqs, b.addDelete(filename, repo, isWiki)) } if len(reqs) > 0 { @@ -239,7 +247,11 @@ func convertResult(searchResult *elastic.SearchResult, kw string, pageSize int) panic(fmt.Sprintf("2===%#v", hit.Highlight)) } - repoID, fileName := internal.ParseIndexerID(hit.Id) + repoID, isWiki, fileName, err := internal.ParseIndexerID(hit.Id) + if err != nil { + return 0, nil, nil, err + } + res := make(map[string]any) if err := json.Unmarshal(hit.Source, &res); err != nil { return 0, nil, nil, err @@ -249,6 +261,7 @@ func convertResult(searchResult *elastic.SearchResult, kw string, pageSize int) hits = append(hits, &internal.SearchResult{ RepoID: repoID, + IsWiki: isWiki, Filename: fileName, CommitID: res["commit_id"].(string), Content: res["content"].(string), @@ -299,6 +312,12 @@ func (b *Indexer) Search(ctx context.Context, opts *internal.SearchOptions) (int query = query.Must(repoQuery) } + opts.IsWiki = optional.Some(false) // TODO + + if opts.IsWiki.Has() { + query = query.Must(elastic.NewTermQuery("is_wiki", opts.IsWiki.Value())) + } + var ( start, pageSize = opts.GetSkipTake() kw = "" + opts.Keyword + "" diff --git a/modules/indexer/code/internal/indexer.go b/modules/indexer/code/internal/indexer.go index c259fcd26eb6f..12862c2ebf6aa 100644 --- a/modules/indexer/code/internal/indexer.go +++ b/modules/indexer/code/internal/indexer.go @@ -10,6 +10,7 @@ import ( "code.gitea.io/gitea/models/db" repo_model "code.gitea.io/gitea/models/repo" "code.gitea.io/gitea/modules/indexer/internal" + "code.gitea.io/gitea/modules/optional" ) // Indexer defines an interface to index and search code contents @@ -27,6 +28,8 @@ type SearchOptions struct { IsKeywordFuzzy bool + IsWiki optional.Option[bool] + db.Paginator } diff --git a/modules/indexer/code/internal/model.go b/modules/indexer/code/internal/model.go index f75263c83cfe0..05de7e930f397 100644 --- a/modules/indexer/code/internal/model.go +++ b/modules/indexer/code/internal/model.go @@ -21,11 +21,13 @@ type RepoChanges struct { // IndexerData represents data stored in the code indexer type IndexerData struct { RepoID int64 + IsWiki bool } // SearchResult result of performing a search in a repo type SearchResult struct { RepoID int64 + IsWiki bool StartIndex int EndIndex int Filename string diff --git a/modules/indexer/code/internal/util.go b/modules/indexer/code/internal/util.go index 689c4f4584b14..414b4a9857f89 100644 --- a/modules/indexer/code/internal/util.go +++ b/modules/indexer/code/internal/util.go @@ -4,29 +4,35 @@ package internal import ( + "fmt" "strings" "code.gitea.io/gitea/modules/indexer/internal" "code.gitea.io/gitea/modules/log" ) -func FilenameIndexerID(repoID int64, filename string) string { - return internal.Base36(repoID) + "_" + filename +func FilenameIndexerID(repoID int64, isWiki bool, filename string) string { + t := "r" + if isWiki { + t = "w" + } + return internal.Base36(repoID) + "_" + t + "_" + filename } -func ParseIndexerID(indexerID string) (int64, string) { - index := strings.IndexByte(indexerID, '_') - if index == -1 { - log.Error("Unexpected ID in repo indexer: %s", indexerID) +func ParseIndexerID(indexerID string) (int64, bool, string, error) { + parts := strings.SplitN(indexerID, "_", 3) + if len(parts) != 3 { + return 0, false, "", fmt.Errorf("unexpected ID in repo indexer: %s", indexerID) } - repoID, _ := internal.ParseBase36(indexerID[:index]) - return repoID, indexerID[index+1:] + repoID, _ := internal.ParseBase36(parts[0]) + isWiki := parts[1] == "w" + return repoID, isWiki, parts[2], nil } func FilenameOfIndexerID(indexerID string) string { - index := strings.IndexByte(indexerID, '_') - if index == -1 { - log.Error("Unexpected ID in repo indexer: %s", indexerID) + _, _, name, err := ParseIndexerID(indexerID) + if err != nil { + log.Error(err.Error()) } - return indexerID[index+1:] + return name } diff --git a/modules/indexer/code/internal/util_test.go b/modules/indexer/code/internal/util_test.go new file mode 100644 index 0000000000000..445397a479e2e --- /dev/null +++ b/modules/indexer/code/internal/util_test.go @@ -0,0 +1,27 @@ +// Copyright 2024 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package internal + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestFilenameIndexerID(t *testing.T) { + assert.EqualValues(t, "9ix_r_test.txt", FilenameIndexerID(12345, false, "test.txt")) + assert.EqualValues(t, "9ix_w_test.txt", FilenameIndexerID(12345, true, "test.txt")) + assert.EqualValues(t, "n_r_you don't know how to name a file?", FilenameIndexerID(23, false, "you don't know how to name a file?")) +} + +func TestParseIndexerID(t *testing.T) { + repoID, isWiki, filename, err := ParseIndexerID("9ix_r_test.txt") + assert.NoError(t, err) + assert.EqualValues(t, 12345, repoID) + assert.False(t, isWiki) + assert.EqualValues(t, "test.txt", filename) + + _, _, _, err = ParseIndexerID("9ix_r") + assert.Error(t, err) +} From 01c4353d306fbf532fa6688399c20f5c00d9e892 Mon Sep 17 00:00:00 2001 From: "m.huber" Date: Tue, 12 Mar 2024 00:10:27 +0100 Subject: [PATCH 04/14] Prepair CodeIndexer to index wikis --- models/repo/repo_indexer.go | 2 + modules/indexer/code/bleve/bleve.go | 20 ++- .../code/elasticsearch/elasticsearch.go | 20 ++- modules/indexer/code/git.go | 64 ++++++--- modules/indexer/code/indexer.go | 123 +++++++++++------- modules/indexer/code/indexer_test.go | 2 +- modules/indexer/code/internal/indexer.go | 4 +- 7 files changed, 156 insertions(+), 79 deletions(-) diff --git a/models/repo/repo_indexer.go b/models/repo/repo_indexer.go index 6e19d8f937f18..4875354823273 100644 --- a/models/repo/repo_indexer.go +++ b/models/repo/repo_indexer.go @@ -20,6 +20,8 @@ const ( RepoIndexerTypeCode RepoIndexerType = iota // 0 // RepoIndexerTypeStats repository stats indexer RepoIndexerTypeStats // 1 + // RepoIndexerTypeWiki wiki indexer + RepoIndexerTypeWiki // 2 ) // RepoIndexerStatus status of a repo's entry in the repo indexer diff --git a/modules/indexer/code/bleve/bleve.go b/modules/indexer/code/bleve/bleve.go index 44ec50a3c3d32..1b9cafb4266b4 100644 --- a/modules/indexer/code/bleve/bleve.go +++ b/modules/indexer/code/bleve/bleve.go @@ -139,10 +139,15 @@ func (b *Indexer) addUpdate(ctx context.Context, batchWriter git.WriteCloserErro size := update.Size + repoPath := repo.RepoPath() + if isWiki { + repoPath = repo.WikiPath() + } + var err error if !update.Sized { var stdout string - stdout, _, err = git.NewCommand(ctx, "cat-file", "-s").AddDynamicArguments(update.BlobSha).RunStdString(&git.RunOpts{Dir: repo.RepoPath()}) + stdout, _, err = git.NewCommand(ctx, "cat-file", "-s").AddDynamicArguments(update.BlobSha).RunStdString(&git.RunOpts{Dir: repoPath}) if err != nil { return err } @@ -192,19 +197,22 @@ func (b *Indexer) addDelete(filename string, repo *repo_model.Repository, isWiki } // Index indexes the data -func (b *Indexer) Index(ctx context.Context, repo *repo_model.Repository, sha string, changes *internal.RepoChanges) error { - isWiki := false // TODO +func (b *Indexer) Index(ctx context.Context, repo *repo_model.Repository, isWiki bool, sha string, changes *internal.RepoChanges) error { + repoPath := repo.RepoPath() + if isWiki { + repoPath = repo.WikiPath() + } batch := inner_bleve.NewFlushingBatch(b.inner.Indexer, maxBatchSize) if len(changes.Updates) > 0 { // Now because of some insanity with git cat-file not immediately failing if not run in a valid git directory we need to run git rev-parse first! - if err := git.EnsureValidGitRepository(ctx, repo.RepoPath()); err != nil { - log.Error("Unable to open git repo: %s for %-v: %v", repo.RepoPath(), repo, err) + if err := git.EnsureValidGitRepository(ctx, repoPath); err != nil { + log.Error("Unable to open git repo: %s for %-v: %v", repoPath, repo, err) return err } - batchWriter, batchReader, cancel := git.CatFileBatch(ctx, repo.RepoPath()) + batchWriter, batchReader, cancel := git.CatFileBatch(ctx, repoPath) defer cancel() for _, update := range changes.Updates { diff --git a/modules/indexer/code/elasticsearch/elasticsearch.go b/modules/indexer/code/elasticsearch/elasticsearch.go index f1c870856a705..adedd2ce7ad1a 100644 --- a/modules/indexer/code/elasticsearch/elasticsearch.go +++ b/modules/indexer/code/elasticsearch/elasticsearch.go @@ -96,10 +96,15 @@ func (b *Indexer) addUpdate(ctx context.Context, batchWriter git.WriteCloserErro } size := update.Size + repoPath := repo.RepoPath() + if isWiki { + repoPath = repo.WikiPath() + } + var err error if !update.Sized { var stdout string - stdout, _, err = git.NewCommand(ctx, "cat-file", "-s").AddDynamicArguments(update.BlobSha).RunStdString(&git.RunOpts{Dir: repo.RepoPath()}) + stdout, _, err = git.NewCommand(ctx, "cat-file", "-s").AddDynamicArguments(update.BlobSha).RunStdString(&git.RunOpts{Dir: repoPath}) if err != nil { return nil, err } @@ -157,18 +162,21 @@ func (b *Indexer) addDelete(filename string, repo *repo_model.Repository, isWiki } // Index will save the index data -func (b *Indexer) Index(ctx context.Context, repo *repo_model.Repository, sha string, changes *internal.RepoChanges) error { - isWiki := false // TODO +func (b *Indexer) Index(ctx context.Context, repo *repo_model.Repository, isWiki bool, sha string, changes *internal.RepoChanges) error { + repoPath := repo.RepoPath() + if isWiki { + repoPath = repo.WikiPath() + } reqs := make([]elastic.BulkableRequest, 0) if len(changes.Updates) > 0 { // Now because of some insanity with git cat-file not immediately failing if not run in a valid git directory we need to run git rev-parse first! - if err := git.EnsureValidGitRepository(ctx, repo.RepoPath()); err != nil { - log.Error("Unable to open git repo: %s for %-v: %v", repo.RepoPath(), repo, err) + if err := git.EnsureValidGitRepository(ctx, repoPath); err != nil { + log.Error("Unable to open git repo: %s for %-v: %v", repoPath, repo, err) return err } - batchWriter, batchReader, cancel := git.CatFileBatch(ctx, repo.RepoPath()) + batchWriter, batchReader, cancel := git.CatFileBatch(ctx, repoPath) defer cancel() for _, update := range changes.Updates { diff --git a/modules/indexer/code/git.go b/modules/indexer/code/git.go index f105d032eba12..92e28e5782c00 100644 --- a/modules/indexer/code/git.go +++ b/modules/indexer/code/git.go @@ -15,8 +15,13 @@ import ( "code.gitea.io/gitea/modules/setting" ) -func getDefaultBranchSha(ctx context.Context, repo *repo_model.Repository) (string, error) { - stdout, _, err := git.NewCommand(ctx, "show-ref", "-s").AddDynamicArguments(git.BranchPrefix + repo.DefaultBranch).RunStdString(&git.RunOpts{Dir: repo.RepoPath()}) +func getDefaultBranchSha(ctx context.Context, repo *repo_model.Repository, isWiki bool) (string, error) { + repoPath := repo.RepoPath() + if isWiki { + repoPath = repo.WikiPath() + } + + stdout, _, err := git.NewCommand(ctx, "show-ref", "-s").AddDynamicArguments(git.BranchPrefix + repo.DefaultBranch).RunStdString(&git.RunOpts{Dir: repoPath}) if err != nil { return "", err } @@ -24,23 +29,30 @@ func getDefaultBranchSha(ctx context.Context, repo *repo_model.Repository) (stri } // getRepoChanges returns changes to repo since last indexer update -func getRepoChanges(ctx context.Context, repo *repo_model.Repository, revision string) (*internal.RepoChanges, error) { - status, err := repo_model.GetIndexerStatus(ctx, repo, repo_model.RepoIndexerTypeCode) +func getRepoChanges(ctx context.Context, repo *repo_model.Repository, isWiki bool, revision string) (*internal.RepoChanges, error) { + repoPath := repo.RepoPath() + indexerType := repo_model.RepoIndexerTypeCode + if isWiki { + repoPath = repo.WikiPath() + indexerType = repo_model.RepoIndexerTypeWiki + } + + status, err := repo_model.GetIndexerStatus(ctx, repo, indexerType) if err != nil { return nil, err } needGenesis := len(status.CommitSha) == 0 if !needGenesis { - hasAncestorCmd := git.NewCommand(ctx, "merge-base").AddDynamicArguments(repo.CodeIndexerStatus.CommitSha, revision) - stdout, _, _ := hasAncestorCmd.RunStdString(&git.RunOpts{Dir: repo.RepoPath()}) + hasAncestorCmd := git.NewCommand(ctx, "merge-base").AddDynamicArguments(status.CommitSha, revision) + stdout, _, _ := hasAncestorCmd.RunStdString(&git.RunOpts{Dir: repoPath}) needGenesis = len(stdout) == 0 } if needGenesis { - return genesisChanges(ctx, repo, revision) + return genesisChanges(ctx, repo, isWiki, revision) } - return nonGenesisChanges(ctx, repo, revision) + return nonGenesisChanges(ctx, repo, isWiki, status, revision) } func isIndexable(entry *git.TreeEntry) bool { @@ -84,14 +96,23 @@ func parseGitLsTreeOutput(objectFormat git.ObjectFormat, stdout []byte) ([]inter } // genesisChanges get changes to add repo to the indexer for the first time -func genesisChanges(ctx context.Context, repo *repo_model.Repository, revision string) (*internal.RepoChanges, error) { +func genesisChanges(ctx context.Context, repo *repo_model.Repository, isWiki bool, revision string) (*internal.RepoChanges, error) { var changes internal.RepoChanges - stdout, _, runErr := git.NewCommand(ctx, "ls-tree", "--full-tree", "-l", "-r").AddDynamicArguments(revision).RunStdBytes(&git.RunOpts{Dir: repo.RepoPath()}) + repoPath := repo.RepoPath() + if isWiki { + repoPath = repo.WikiPath() + } + + stdout, _, runErr := git.NewCommand(ctx, "ls-tree", "--full-tree", "-l", "-r").AddDynamicArguments(revision).RunStdBytes(&git.RunOpts{Dir: repoPath}) if runErr != nil { return nil, runErr } - objectFormat := git.ObjectFormatFromName(repo.ObjectFormatName) + repoObjectFormatName := "sha1" + if !isWiki { + repoObjectFormatName = repo.ObjectFormatName + } + objectFormat := git.ObjectFormatFromName(repoObjectFormatName) var err error changes.Updates, err = parseGitLsTreeOutput(objectFormat, stdout) @@ -99,9 +120,14 @@ func genesisChanges(ctx context.Context, repo *repo_model.Repository, revision s } // nonGenesisChanges get changes since the previous indexer update -func nonGenesisChanges(ctx context.Context, repo *repo_model.Repository, revision string) (*internal.RepoChanges, error) { - diffCmd := git.NewCommand(ctx, "diff", "--name-status").AddDynamicArguments(repo.CodeIndexerStatus.CommitSha, revision) - stdout, _, runErr := diffCmd.RunStdString(&git.RunOpts{Dir: repo.RepoPath()}) +func nonGenesisChanges(ctx context.Context, repo *repo_model.Repository, isWiki bool, indexerStatus *repo_model.RepoIndexerStatus, revision string) (*internal.RepoChanges, error) { + repoPath := repo.RepoPath() + if isWiki { + repoPath = repo.WikiPath() + } + + diffCmd := git.NewCommand(ctx, "diff", "--name-status").AddDynamicArguments(indexerStatus.CommitSha, revision) + stdout, _, runErr := diffCmd.RunStdString(&git.RunOpts{Dir: repoPath}) if runErr != nil { // previous commit sha may have been removed by a force push, so // try rebuilding from scratch @@ -109,7 +135,7 @@ func nonGenesisChanges(ctx context.Context, repo *repo_model.Repository, revisio if err := (*globalIndexer.Load()).Delete(ctx, repo.ID); err != nil { return nil, err } - return genesisChanges(ctx, repo, revision) + return genesisChanges(ctx, repo, isWiki, revision) } var changes internal.RepoChanges @@ -167,12 +193,16 @@ func nonGenesisChanges(ctx context.Context, repo *repo_model.Repository, revisio cmd := git.NewCommand(ctx, "ls-tree", "--full-tree", "-l").AddDynamicArguments(revision). AddDashesAndList(updatedFilenames...) - lsTreeStdout, _, err := cmd.RunStdBytes(&git.RunOpts{Dir: repo.RepoPath()}) + lsTreeStdout, _, err := cmd.RunStdBytes(&git.RunOpts{Dir: repoPath}) if err != nil { return nil, err } - objectFormat := git.ObjectFormatFromName(repo.ObjectFormatName) + repoObjectFormatName := "sha1" + if !isWiki { + repoObjectFormatName = repo.ObjectFormatName + } + objectFormat := git.ObjectFormatFromName(repoObjectFormatName) changes.Updates, err = parseGitLsTreeOutput(objectFormat, lsTreeStdout) return &changes, err diff --git a/modules/indexer/code/indexer.go b/modules/indexer/code/indexer.go index ebebf6ba8a28d..20d48e85955df 100644 --- a/modules/indexer/code/indexer.go +++ b/modules/indexer/code/indexer.go @@ -38,7 +38,7 @@ func init() { globalIndexer.Store(dummyIndexer) } -func index(ctx context.Context, indexer internal.Indexer, repoID int64) error { +func index(ctx context.Context, indexer internal.Indexer, repoID int64, isWiki bool) error { repo, err := repo_model.GetRepositoryByID(ctx, repoID) if repo_model.IsErrRepoNotExist(err) { return indexer.Delete(ctx, repoID) @@ -53,42 +53,64 @@ func index(ctx context.Context, indexer internal.Indexer, repoID int64) error { repoTypes = []string{"sources"} } - // skip forks from being indexed if unit is not present - if !slices.Contains(repoTypes, "forks") && repo.IsFork { - return nil - } + if isWiki { + // ignore empty wikis + if !repo.HasWiki() { + return nil + } - // skip mirrors from being indexed if unit is not present - if !slices.Contains(repoTypes, "mirrors") && repo.IsMirror { - return nil - } + // skip wikis from being indexed if unit is not present + if !slices.Contains(repoTypes, "wikis") { + return nil + } + } else { + // ignore empty repos + if repo.IsEmpty { + return nil + } - // skip templates from being indexed if unit is not present - if !slices.Contains(repoTypes, "templates") && repo.IsTemplate { - return nil - } + // skip forks from being indexed if unit is not present + if !slices.Contains(repoTypes, "forks") && repo.IsFork { + return nil + } - // skip regular repos from being indexed if unit is not present - if !slices.Contains(repoTypes, "sources") && !repo.IsFork && !repo.IsMirror && !repo.IsTemplate { - return nil + // skip mirrors from being indexed if unit is not present + if !slices.Contains(repoTypes, "mirrors") && repo.IsMirror { + return nil + } + + // skip templates from being indexed if unit is not present + if !slices.Contains(repoTypes, "templates") && repo.IsTemplate { + return nil + } + + // skip regular repos from being indexed if unit is not present + if !slices.Contains(repoTypes, "sources") && !repo.IsFork && !repo.IsMirror && !repo.IsTemplate { + return nil + } } - sha, err := getDefaultBranchSha(ctx, repo) + sha, err := getDefaultBranchSha(ctx, repo, isWiki) if err != nil { return err } - changes, err := getRepoChanges(ctx, repo, sha) + changes, err := getRepoChanges(ctx, repo, isWiki, sha) if err != nil { return err } else if changes == nil { return nil } - if err := indexer.Index(ctx, repo, sha, changes); err != nil { + if err := indexer.Index(ctx, repo, isWiki, sha, changes); err != nil { return err } - return repo_model.UpdateIndexerStatus(ctx, repo, repo_model.RepoIndexerTypeCode, sha) + indexerType := repo_model.RepoIndexerTypeCode + if isWiki { + indexerType = repo_model.RepoIndexerTypeWiki + } + + return repo_model.UpdateIndexerStatus(ctx, repo, indexerType, sha) } // Init initialize the repo indexer @@ -121,11 +143,11 @@ func Init() { handler := func(items ...*internal.IndexerData) (unhandled []*internal.IndexerData) { indexer := *globalIndexer.Load() for _, indexerData := range items { - log.Trace("IndexerData Process Repo: %d", indexerData.RepoID) - if err := index(ctx, indexer, indexerData.RepoID); err != nil { + log.Trace("IndexerData Process Repo: %d (IsWiki=%v)", indexerData.RepoID, indexerData.IsWiki) + if err := index(ctx, indexer, indexerData.RepoID, indexerData.IsWiki); err != nil { unhandled = append(unhandled, indexerData) if !setting.IsInTesting { - log.Error("Codes indexer handler: index error for repo %v: %v", indexerData.RepoID, err) + log.Error("Codes indexer handler: index error for repo %d (wiki=%v): %v", indexerData.RepoID, indexerData.IsWiki, err) } } } @@ -273,40 +295,47 @@ func populateRepoIndexer(ctx context.Context) { log.Fatal("System error: %v", err) } - var maxRepoID int64 - if maxRepoID, err = db.GetMaxID("repository"); err != nil { - log.Fatal("System error: %v", err) - } - - // start with the maximum existing repo ID and work backwards, so that we - // don't include repos that are created after gitea starts; such repos will - // already be added to the indexer, and we don't need to add them again. - for maxRepoID > 0 { - select { - case <-ctx.Done(): - log.Info("Repository Indexer population shutdown before completion") - return - default: + for _, isWiki := range []bool{false, true} { + indexerType := repo_model.RepoIndexerTypeCode + if isWiki { + indexerType = repo_model.RepoIndexerTypeWiki } - ids, err := repo_model.GetUnindexedRepos(ctx, repo_model.RepoIndexerTypeCode, maxRepoID, 0, 50) - if err != nil { - log.Error("populateRepoIndexer: %v", err) - return - } else if len(ids) == 0 { - break + + var maxRepoID int64 + if maxRepoID, err = db.GetMaxID("repository"); err != nil { + log.Fatal("System error: %v", err) } - for _, id := range ids { + + // start with the maximum existing repo ID and work backwards, so that we + // don't include repos that are created after gitea starts; such repos will + // already be added to the indexer, and we don't need to add them again. + for maxRepoID > 0 { select { case <-ctx.Done(): log.Info("Repository Indexer population shutdown before completion") return default: } - if err := indexerQueue.Push(&internal.IndexerData{RepoID: id}); err != nil { - log.Error("indexerQueue.Push: %v", err) + ids, err := repo_model.GetUnindexedRepos(ctx, indexerType, maxRepoID, 0, 50) + if err != nil { + log.Error("populateRepoIndexer: %v", err) return + } else if len(ids) == 0 { + break + } + for _, id := range ids { + select { + case <-ctx.Done(): + log.Info("Repository Indexer population shutdown before completion") + return + default: + } + if err := indexerQueue.Push(&internal.IndexerData{RepoID: id, IsWiki: isWiki}); err != nil { + log.Error("indexerQueue.Push: %v", err) + return + } + maxRepoID = id - 1 } - maxRepoID = id - 1 } } log.Info("Done (re)populating the repo indexer with existing repositories") diff --git a/modules/indexer/code/indexer_test.go b/modules/indexer/code/indexer_test.go index 8975c5ce4083b..861d2c575cf47 100644 --- a/modules/indexer/code/indexer_test.go +++ b/modules/indexer/code/indexer_test.go @@ -29,7 +29,7 @@ func TestMain(m *testing.M) { func testIndexer(name string, t *testing.T, indexer internal.Indexer) { t.Run(name, func(t *testing.T) { var repoID int64 = 1 - err := index(git.DefaultContext, indexer, repoID) + err := index(git.DefaultContext, indexer, repoID, false) assert.NoError(t, err) keywords := []struct { RepoIDs []int64 diff --git a/modules/indexer/code/internal/indexer.go b/modules/indexer/code/internal/indexer.go index 12862c2ebf6aa..eae217aa4a45f 100644 --- a/modules/indexer/code/internal/indexer.go +++ b/modules/indexer/code/internal/indexer.go @@ -16,7 +16,7 @@ import ( // Indexer defines an interface to index and search code contents type Indexer interface { internal.Indexer - Index(ctx context.Context, repo *repo_model.Repository, sha string, changes *RepoChanges) error + Index(ctx context.Context, repo *repo_model.Repository, isWiki bool, sha string, changes *RepoChanges) error Delete(ctx context.Context, repoID int64) error Search(ctx context.Context, opts *SearchOptions) (int64, []*SearchResult, []*SearchResultLanguages, error) } @@ -44,7 +44,7 @@ type dummyIndexer struct { internal.Indexer } -func (d *dummyIndexer) Index(ctx context.Context, repo *repo_model.Repository, sha string, changes *RepoChanges) error { +func (d *dummyIndexer) Index(ctx context.Context, repo *repo_model.Repository, isWiki bool, sha string, changes *RepoChanges) error { return fmt.Errorf("indexer is not ready") } From 48873be2afc4fc0c665811a1b4688df43eea5056 Mon Sep 17 00:00:00 2001 From: "m.huber" Date: Tue, 12 Mar 2024 00:13:09 +0100 Subject: [PATCH 05/14] move testing-todo --- modules/indexer/code/elasticsearch/elasticsearch.go | 3 --- modules/indexer/code/search.go | 3 +++ 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/indexer/code/elasticsearch/elasticsearch.go b/modules/indexer/code/elasticsearch/elasticsearch.go index adedd2ce7ad1a..2446be95360df 100644 --- a/modules/indexer/code/elasticsearch/elasticsearch.go +++ b/modules/indexer/code/elasticsearch/elasticsearch.go @@ -20,7 +20,6 @@ import ( inner_elasticsearch "code.gitea.io/gitea/modules/indexer/internal/elasticsearch" "code.gitea.io/gitea/modules/json" "code.gitea.io/gitea/modules/log" - "code.gitea.io/gitea/modules/optional" "code.gitea.io/gitea/modules/setting" "code.gitea.io/gitea/modules/timeutil" "code.gitea.io/gitea/modules/typesniffer" @@ -320,8 +319,6 @@ func (b *Indexer) Search(ctx context.Context, opts *internal.SearchOptions) (int query = query.Must(repoQuery) } - opts.IsWiki = optional.Some(false) // TODO - if opts.IsWiki.Has() { query = query.Must(elastic.NewTermQuery("is_wiki", opts.IsWiki.Value())) } diff --git a/modules/indexer/code/search.go b/modules/indexer/code/search.go index f0d5047ad3c2b..e0e237b73cc05 100644 --- a/modules/indexer/code/search.go +++ b/modules/indexer/code/search.go @@ -11,6 +11,7 @@ import ( "code.gitea.io/gitea/modules/highlight" "code.gitea.io/gitea/modules/indexer/code/internal" + "code.gitea.io/gitea/modules/optional" "code.gitea.io/gitea/modules/timeutil" ) @@ -132,6 +133,8 @@ func PerformSearch(ctx context.Context, opts *internal.SearchOptions) (int, []*R return 0, nil, nil, nil } + opts.IsWiki = optional.Some(false) // TODO: for testing + total, results, resultLanguages, err := (*globalIndexer.Load()).Search(ctx, opts) if err != nil { return 0, nil, nil, err From 64d08566160e95c4ec652cb2d8936bc29670d802 Mon Sep 17 00:00:00 2001 From: "m.huber" Date: Tue, 12 Mar 2024 00:14:24 +0100 Subject: [PATCH 06/14] nit --- modules/indexer/code/search.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/indexer/code/search.go b/modules/indexer/code/search.go index e0e237b73cc05..487e95d9e1c84 100644 --- a/modules/indexer/code/search.go +++ b/modules/indexer/code/search.go @@ -127,7 +127,7 @@ func searchResult(result *internal.SearchResult, startIndex, endIndex int) (*Res } // PerformSearch perform a search on a repository -// if isFuzzy is true set the Damerau-Levenshtein distance from 0 to 2 +// if isFuzzy is false the Damerau-Levenshtein distance is 0 func PerformSearch(ctx context.Context, opts *internal.SearchOptions) (int, []*Result, []*internal.SearchResultLanguages, error) { if opts == nil || len(opts.Keyword) == 0 { return 0, nil, nil, nil From 934069bdf5d1033e3a2a7473b56d92b2e64f87fb Mon Sep 17 00:00:00 2001 From: "m.huber" Date: Tue, 12 Mar 2024 00:30:53 +0100 Subject: [PATCH 07/14] fix branch-ref for wikis --- modules/indexer/code/git.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/modules/indexer/code/git.go b/modules/indexer/code/git.go index 92e28e5782c00..3ba02e46b39ce 100644 --- a/modules/indexer/code/git.go +++ b/modules/indexer/code/git.go @@ -17,11 +17,13 @@ import ( func getDefaultBranchSha(ctx context.Context, repo *repo_model.Repository, isWiki bool) (string, error) { repoPath := repo.RepoPath() + defaultBranch := repo.DefaultBranch if isWiki { repoPath = repo.WikiPath() + defaultBranch = repo.DefaultWikiBranch } - stdout, _, err := git.NewCommand(ctx, "show-ref", "-s").AddDynamicArguments(git.BranchPrefix + repo.DefaultBranch).RunStdString(&git.RunOpts{Dir: repoPath}) + stdout, _, err := git.NewCommand(ctx, "show-ref", "-s").AddDynamicArguments(git.BranchPrefix + defaultBranch).RunStdString(&git.RunOpts{Dir: repoPath}) if err != nil { return "", err } From d94bf7ef588944109fe30aabcf9edef08698a69a Mon Sep 17 00:00:00 2001 From: "m.huber" Date: Tue, 12 Mar 2024 00:49:09 +0100 Subject: [PATCH 08/14] expose search option via url query --- modules/indexer/code/search.go | 3 --- routers/web/explore/code.go | 4 ++++ routers/web/repo/search.go | 4 ++++ routers/web/user/code.go | 4 ++++ 4 files changed, 12 insertions(+), 3 deletions(-) diff --git a/modules/indexer/code/search.go b/modules/indexer/code/search.go index 487e95d9e1c84..099bd38e15794 100644 --- a/modules/indexer/code/search.go +++ b/modules/indexer/code/search.go @@ -11,7 +11,6 @@ import ( "code.gitea.io/gitea/modules/highlight" "code.gitea.io/gitea/modules/indexer/code/internal" - "code.gitea.io/gitea/modules/optional" "code.gitea.io/gitea/modules/timeutil" ) @@ -133,8 +132,6 @@ func PerformSearch(ctx context.Context, opts *internal.SearchOptions) (int, []*R return 0, nil, nil, nil } - opts.IsWiki = optional.Some(false) // TODO: for testing - total, results, resultLanguages, err := (*globalIndexer.Load()).Search(ctx, opts) if err != nil { return 0, nil, nil, err diff --git a/routers/web/explore/code.go b/routers/web/explore/code.go index 75bd0f3d2462d..4fe2bb531e84d 100644 --- a/routers/web/explore/code.go +++ b/routers/web/explore/code.go @@ -37,11 +37,13 @@ func Code(ctx *context.Context) { queryType := ctx.FormTrim("t") isFuzzy := queryType != "match" + wikis := ctx.FormOptionalBool("wikis") ctx.Data["Keyword"] = keyword ctx.Data["Language"] = language ctx.Data["queryType"] = queryType ctx.Data["PageIsViewCode"] = true + ctx.Data["Wikis"] = wikis if keyword == "" { ctx.HTML(http.StatusOK, tplExploreCode) @@ -82,6 +84,7 @@ func Code(ctx *context.Context) { RepoIDs: repoIDs, Keyword: keyword, IsKeywordFuzzy: isFuzzy, + IsWiki: wikis, Language: language, Paginator: &db.ListOptions{ Page: page, @@ -139,6 +142,7 @@ func Code(ctx *context.Context) { pager := context.NewPagination(total, setting.UI.RepoSearchPagingNum, page, 5) pager.SetDefaultParams(ctx) pager.AddParam(ctx, "l", "Language") + pager.AddParam(ctx, "wikis", "Wikis") ctx.Data["Page"] = pager ctx.HTML(http.StatusOK, tplExploreCode) diff --git a/routers/web/repo/search.go b/routers/web/repo/search.go index 042e5d3c711ca..4209a55b9782f 100644 --- a/routers/web/repo/search.go +++ b/routers/web/repo/search.go @@ -27,11 +27,13 @@ func Search(ctx *context.Context) { queryType := ctx.FormTrim("t") isFuzzy := queryType != "match" + wikis := ctx.FormOptionalBool("wikis") ctx.Data["Keyword"] = keyword ctx.Data["Language"] = language ctx.Data["queryType"] = queryType ctx.Data["PageIsViewCode"] = true + ctx.Data["Wikis"] = wikis if keyword == "" { ctx.HTML(http.StatusOK, tplSearch) @@ -47,6 +49,7 @@ func Search(ctx *context.Context) { RepoIDs: []int64{ctx.Repo.Repository.ID}, Keyword: keyword, IsKeywordFuzzy: isFuzzy, + IsWiki: wikis, Language: language, Paginator: &db.ListOptions{ Page: page, @@ -70,6 +73,7 @@ func Search(ctx *context.Context) { pager := context.NewPagination(total, setting.UI.RepoSearchPagingNum, page, 5) pager.SetDefaultParams(ctx) pager.AddParam(ctx, "l", "Language") + pager.AddParam(ctx, "wikis", "Wikis") ctx.Data["Page"] = pager ctx.HTML(http.StatusOK, tplSearch) diff --git a/routers/web/user/code.go b/routers/web/user/code.go index d2afdd89051ef..9d19ee521278e 100644 --- a/routers/web/user/code.go +++ b/routers/web/user/code.go @@ -42,11 +42,13 @@ func CodeSearch(ctx *context.Context) { queryType := ctx.FormTrim("t") isFuzzy := queryType != "match" + wikis := ctx.FormOptionalBool("wikis") ctx.Data["Keyword"] = keyword ctx.Data["Language"] = language ctx.Data["queryType"] = queryType ctx.Data["IsCodePage"] = true + ctx.Data["Wikis"] = wikis if keyword == "" { ctx.HTML(http.StatusOK, tplUserCode) @@ -80,6 +82,7 @@ func CodeSearch(ctx *context.Context) { RepoIDs: repoIDs, Keyword: keyword, IsKeywordFuzzy: isFuzzy, + IsWiki: wikis, Language: language, Paginator: &db.ListOptions{ Page: page, @@ -124,6 +127,7 @@ func CodeSearch(ctx *context.Context) { pager := context.NewPagination(total, setting.UI.RepoSearchPagingNum, page, 5) pager.SetDefaultParams(ctx) pager.AddParam(ctx, "l", "Language") + pager.AddParam(ctx, "wikis", "Wikis") ctx.Data["Page"] = pager ctx.HTML(http.StatusOK, tplUserCode) From 8a11c63ab999e483ec243e844f30648a8b615870 Mon Sep 17 00:00:00 2001 From: "m.huber" Date: Tue, 12 Mar 2024 01:03:46 +0100 Subject: [PATCH 09/14] let bleve also understand --- modules/indexer/code/bleve/bleve.go | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/modules/indexer/code/bleve/bleve.go b/modules/indexer/code/bleve/bleve.go index 1b9cafb4266b4..8fa69cd2da429 100644 --- a/modules/indexer/code/bleve/bleve.go +++ b/modules/indexer/code/bleve/bleve.go @@ -280,6 +280,12 @@ func (b *Indexer) Search(ctx context.Context, opts *internal.SearchOptions) (int indexerQuery = keywordQuery } + if opts.IsWiki.Has() { + wikiQuery := bleve.NewBoolFieldQuery(opts.IsWiki.Value()) + wikiQuery.FieldVal = "IsWiki" + indexerQuery = bleve.NewConjunctionQuery(indexerQuery, wikiQuery) + } + // Save for reuse without language filter facetQuery := indexerQuery if len(opts.Language) > 0 { @@ -295,7 +301,7 @@ func (b *Indexer) Search(ctx context.Context, opts *internal.SearchOptions) (int from, pageSize := opts.GetSkipTake() searchRequest := bleve.NewSearchRequestOptions(indexerQuery, pageSize, from, false) - searchRequest.Fields = []string{"Content", "RepoID", "Language", "CommitID", "UpdatedAt"} + searchRequest.Fields = []string{"Content", "RepoID", "IsWiki", "Language", "CommitID", "UpdatedAt"} searchRequest.IncludeLocations = true if len(opts.Language) == 0 { From c3e4c7ba7fb2835f763c9b348457efa1501d7309 Mon Sep 17 00:00:00 2001 From: "m.huber" Date: Tue, 12 Mar 2024 01:29:29 +0100 Subject: [PATCH 10/14] pager.AddParam do not understand generic optional --- routers/web/explore/code.go | 5 ++++- routers/web/repo/search.go | 5 ++++- routers/web/user/code.go | 5 ++++- 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/routers/web/explore/code.go b/routers/web/explore/code.go index 4fe2bb531e84d..06d536abb1193 100644 --- a/routers/web/explore/code.go +++ b/routers/web/explore/code.go @@ -4,6 +4,7 @@ package explore import ( + "fmt" "net/http" "code.gitea.io/gitea/models/db" @@ -142,7 +143,9 @@ func Code(ctx *context.Context) { pager := context.NewPagination(total, setting.UI.RepoSearchPagingNum, page, 5) pager.SetDefaultParams(ctx) pager.AddParam(ctx, "l", "Language") - pager.AddParam(ctx, "wikis", "Wikis") + if wikis.Has() { + pager.AddParamString("wikis", fmt.Sprintf("%v", wikis.Value())) + } ctx.Data["Page"] = pager ctx.HTML(http.StatusOK, tplExploreCode) diff --git a/routers/web/repo/search.go b/routers/web/repo/search.go index 4209a55b9782f..22ea81f100f6c 100644 --- a/routers/web/repo/search.go +++ b/routers/web/repo/search.go @@ -4,6 +4,7 @@ package repo import ( + "fmt" "net/http" "code.gitea.io/gitea/models/db" @@ -73,7 +74,9 @@ func Search(ctx *context.Context) { pager := context.NewPagination(total, setting.UI.RepoSearchPagingNum, page, 5) pager.SetDefaultParams(ctx) pager.AddParam(ctx, "l", "Language") - pager.AddParam(ctx, "wikis", "Wikis") + if wikis.Has() { + pager.AddParamString("wikis", fmt.Sprintf("%v", wikis.Value())) + } ctx.Data["Page"] = pager ctx.HTML(http.StatusOK, tplSearch) diff --git a/routers/web/user/code.go b/routers/web/user/code.go index 9d19ee521278e..0b18b40e368ee 100644 --- a/routers/web/user/code.go +++ b/routers/web/user/code.go @@ -4,6 +4,7 @@ package user import ( + "fmt" "net/http" "code.gitea.io/gitea/models/db" @@ -127,7 +128,9 @@ func CodeSearch(ctx *context.Context) { pager := context.NewPagination(total, setting.UI.RepoSearchPagingNum, page, 5) pager.SetDefaultParams(ctx) pager.AddParam(ctx, "l", "Language") - pager.AddParam(ctx, "wikis", "Wikis") + if wikis.Has() { + pager.AddParamString("wikis", fmt.Sprintf("%v", wikis.Value())) + } ctx.Data["Page"] = pager ctx.HTML(http.StatusOK, tplUserCode) From 4fc7e12d18e4a1d97b6216e6629eba0900548a12 Mon Sep 17 00:00:00 2001 From: "m.huber" Date: Tue, 12 Mar 2024 02:26:03 +0100 Subject: [PATCH 11/14] indexer.Delete dont delete wiki on repo update and visa versa + undate wiki index on wiki change --- modules/indexer/code/bleve/bleve.go | 11 +++++-- .../code/elasticsearch/elasticsearch.go | 10 ++++-- modules/indexer/code/git.go | 16 +++++++--- modules/indexer/code/indexer.go | 17 +++++++--- modules/indexer/code/indexer_test.go | 3 +- modules/indexer/code/internal/indexer.go | 4 +-- routers/web/repo/setting/setting.go | 8 ++++- services/indexer/notify.go | 10 +++--- services/wiki/wiki.go | 31 +++++++++++++++++-- tests/integration/repo_search_test.go | 4 +-- 10 files changed, 89 insertions(+), 25 deletions(-) diff --git a/modules/indexer/code/bleve/bleve.go b/modules/indexer/code/bleve/bleve.go index 8fa69cd2da429..54d0ab234d95b 100644 --- a/modules/indexer/code/bleve/bleve.go +++ b/modules/indexer/code/bleve/bleve.go @@ -20,6 +20,7 @@ import ( indexer_internal "code.gitea.io/gitea/modules/indexer/internal" inner_bleve "code.gitea.io/gitea/modules/indexer/internal/bleve" "code.gitea.io/gitea/modules/log" + "code.gitea.io/gitea/modules/optional" "code.gitea.io/gitea/modules/setting" "code.gitea.io/gitea/modules/timeutil" "code.gitea.io/gitea/modules/typesniffer" @@ -231,8 +232,14 @@ func (b *Indexer) Index(ctx context.Context, repo *repo_model.Repository, isWiki } // Delete deletes indexes by ids -func (b *Indexer) Delete(_ context.Context, repoID int64) error { - query := inner_bleve.NumericEqualityQuery(repoID, "RepoID") +func (b *Indexer) Delete(_ context.Context, repoID int64, isWiki optional.Option[bool]) error { + var query query.Query + query = inner_bleve.NumericEqualityQuery(repoID, "RepoID") + if isWiki.Has() { + wikiQuery := bleve.NewBoolFieldQuery(isWiki.Value()) + wikiQuery.FieldVal = "IsWiki" + query = bleve.NewConjunctionQuery(query, wikiQuery) + } searchRequest := bleve.NewSearchRequestOptions(query, 2147483647, 0, false) result, err := b.inner.Indexer.Search(searchRequest) if err != nil { diff --git a/modules/indexer/code/elasticsearch/elasticsearch.go b/modules/indexer/code/elasticsearch/elasticsearch.go index 2446be95360df..2d94ddbb0a296 100644 --- a/modules/indexer/code/elasticsearch/elasticsearch.go +++ b/modules/indexer/code/elasticsearch/elasticsearch.go @@ -20,6 +20,7 @@ import ( inner_elasticsearch "code.gitea.io/gitea/modules/indexer/internal/elasticsearch" "code.gitea.io/gitea/modules/json" "code.gitea.io/gitea/modules/log" + "code.gitea.io/gitea/modules/optional" "code.gitea.io/gitea/modules/setting" "code.gitea.io/gitea/modules/timeutil" "code.gitea.io/gitea/modules/typesniffer" @@ -211,9 +212,14 @@ func (b *Indexer) Index(ctx context.Context, repo *repo_model.Repository, isWiki } // Delete deletes indexes by ids -func (b *Indexer) Delete(ctx context.Context, repoID int64) error { +func (b *Indexer) Delete(ctx context.Context, repoID int64, isWiki optional.Option[bool]) error { + query := elastic.NewBoolQuery() + query = query.Must(elastic.NewTermsQuery("repo_id", repoID)) + if isWiki.Has() { + query = query.Must(elastic.NewTermQuery("is_wiki", isWiki.Value())) + } _, err := b.inner.Client.DeleteByQuery(b.inner.VersionedIndexName()). - Query(elastic.NewTermsQuery("repo_id", repoID)). + Query(query). Do(ctx) return err } diff --git a/modules/indexer/code/git.go b/modules/indexer/code/git.go index 3ba02e46b39ce..0ac290737a91c 100644 --- a/modules/indexer/code/git.go +++ b/modules/indexer/code/git.go @@ -12,6 +12,7 @@ import ( "code.gitea.io/gitea/modules/git" "code.gitea.io/gitea/modules/indexer/code/internal" "code.gitea.io/gitea/modules/log" + "code.gitea.io/gitea/modules/optional" "code.gitea.io/gitea/modules/setting" ) @@ -30,16 +31,23 @@ func getDefaultBranchSha(ctx context.Context, repo *repo_model.Repository, isWik return strings.TrimSpace(stdout), nil } +func getRepoStatus(ctx context.Context, repo *repo_model.Repository, isWiki bool) (*repo_model.RepoIndexerStatus, error) { + indexerType := repo_model.RepoIndexerTypeCode + if isWiki { + indexerType = repo_model.RepoIndexerTypeWiki + } + + return repo_model.GetIndexerStatus(ctx, repo, indexerType) +} + // getRepoChanges returns changes to repo since last indexer update func getRepoChanges(ctx context.Context, repo *repo_model.Repository, isWiki bool, revision string) (*internal.RepoChanges, error) { repoPath := repo.RepoPath() - indexerType := repo_model.RepoIndexerTypeCode if isWiki { repoPath = repo.WikiPath() - indexerType = repo_model.RepoIndexerTypeWiki } - status, err := repo_model.GetIndexerStatus(ctx, repo, indexerType) + status, err := getRepoStatus(ctx, repo, isWiki) if err != nil { return nil, err } @@ -134,7 +142,7 @@ func nonGenesisChanges(ctx context.Context, repo *repo_model.Repository, isWiki // previous commit sha may have been removed by a force push, so // try rebuilding from scratch log.Warn("git diff: %v", runErr) - if err := (*globalIndexer.Load()).Delete(ctx, repo.ID); err != nil { + if err := (*globalIndexer.Load()).Delete(ctx, repo.ID, optional.Some(isWiki)); err != nil { return nil, err } return genesisChanges(ctx, repo, isWiki, revision) diff --git a/modules/indexer/code/indexer.go b/modules/indexer/code/indexer.go index 20d48e85955df..1811246a27e41 100644 --- a/modules/indexer/code/indexer.go +++ b/modules/indexer/code/indexer.go @@ -18,6 +18,7 @@ import ( "code.gitea.io/gitea/modules/indexer/code/elasticsearch" "code.gitea.io/gitea/modules/indexer/code/internal" "code.gitea.io/gitea/modules/log" + "code.gitea.io/gitea/modules/optional" "code.gitea.io/gitea/modules/process" "code.gitea.io/gitea/modules/queue" "code.gitea.io/gitea/modules/setting" @@ -41,7 +42,7 @@ func init() { func index(ctx context.Context, indexer internal.Indexer, repoID int64, isWiki bool) error { repo, err := repo_model.GetRepositoryByID(ctx, repoID) if repo_model.IsErrRepoNotExist(err) { - return indexer.Delete(ctx, repoID) + return indexer.Delete(ctx, repoID, optional.None[bool]()) } if err != nil { return err @@ -54,8 +55,16 @@ func index(ctx context.Context, indexer internal.Indexer, repoID int64, isWiki b } if isWiki { - // ignore empty wikis if !repo.HasWiki() { + // wiki go deleted, so we delete index too + status, err := getRepoStatus(ctx, repo, isWiki) + if err != nil { + return err + } + if status.CommitSha != "" { + indexer.Delete(ctx, repoID, optional.Some(isWiki)) + } + // ignore empty wikis return nil } @@ -264,8 +273,8 @@ func Init() { } // UpdateRepoIndexer update a repository's entries in the indexer -func UpdateRepoIndexer(repo *repo_model.Repository) { - indexData := &internal.IndexerData{RepoID: repo.ID} +func UpdateRepoIndexer(repo *repo_model.Repository, isWiki bool) { + indexData := &internal.IndexerData{RepoID: repo.ID, IsWiki: isWiki} if err := indexerQueue.Push(indexData); err != nil { log.Error("Update repo index data %v failed: %v", indexData, err) } diff --git a/modules/indexer/code/indexer_test.go b/modules/indexer/code/indexer_test.go index 861d2c575cf47..3a5e3db3f9dbf 100644 --- a/modules/indexer/code/indexer_test.go +++ b/modules/indexer/code/indexer_test.go @@ -14,6 +14,7 @@ import ( "code.gitea.io/gitea/modules/indexer/code/bleve" "code.gitea.io/gitea/modules/indexer/code/elasticsearch" "code.gitea.io/gitea/modules/indexer/code/internal" + "code.gitea.io/gitea/modules/optional" _ "code.gitea.io/gitea/models" _ "code.gitea.io/gitea/models/actions" @@ -93,7 +94,7 @@ func testIndexer(name string, t *testing.T, indexer internal.Indexer) { }) } - assert.NoError(t, indexer.Delete(context.Background(), repoID)) + assert.NoError(t, indexer.Delete(context.Background(), repoID, optional.Some(false))) }) } diff --git a/modules/indexer/code/internal/indexer.go b/modules/indexer/code/internal/indexer.go index eae217aa4a45f..0aa145433161c 100644 --- a/modules/indexer/code/internal/indexer.go +++ b/modules/indexer/code/internal/indexer.go @@ -17,7 +17,7 @@ import ( type Indexer interface { internal.Indexer Index(ctx context.Context, repo *repo_model.Repository, isWiki bool, sha string, changes *RepoChanges) error - Delete(ctx context.Context, repoID int64) error + Delete(ctx context.Context, repoID int64, isWiki optional.Option[bool]) error Search(ctx context.Context, opts *SearchOptions) (int64, []*SearchResult, []*SearchResultLanguages, error) } @@ -48,7 +48,7 @@ func (d *dummyIndexer) Index(ctx context.Context, repo *repo_model.Repository, i return fmt.Errorf("indexer is not ready") } -func (d *dummyIndexer) Delete(ctx context.Context, repoID int64) error { +func (d *dummyIndexer) Delete(ctx context.Context, repoID int64, isWiki optional.Option[bool]) error { return fmt.Errorf("indexer is not ready") } diff --git a/routers/web/repo/setting/setting.go b/routers/web/repo/setting/setting.go index e045e3b8dcc09..261ca1f10e1bc 100644 --- a/routers/web/repo/setting/setting.go +++ b/routers/web/repo/setting/setting.go @@ -676,7 +676,13 @@ func SettingsPost(ctx *context.Context) { ctx.Error(http.StatusForbidden) return } - code.UpdateRepoIndexer(ctx.Repo.Repository) + code.UpdateRepoIndexer(ctx.Repo.Repository, false) + case "wiki": + if !setting.Indexer.RepoIndexerEnabled { + ctx.Error(http.StatusForbidden) + return + } + code.UpdateRepoIndexer(ctx.Repo.Repository, true) default: ctx.NotFound("", nil) return diff --git a/services/indexer/notify.go b/services/indexer/notify.go index f1e21a2d40ed1..4e25a4b6d9960 100644 --- a/services/indexer/notify.go +++ b/services/indexer/notify.go @@ -70,14 +70,14 @@ func (r *indexerNotifier) DeleteComment(ctx context.Context, doer *user_model.Us func (r *indexerNotifier) DeleteRepository(ctx context.Context, doer *user_model.User, repo *repo_model.Repository) { issue_indexer.DeleteRepoIssueIndexer(ctx, repo.ID) if setting.Indexer.RepoIndexerEnabled { - code_indexer.UpdateRepoIndexer(repo) + code_indexer.UpdateRepoIndexer(repo, false) } } func (r *indexerNotifier) MigrateRepository(ctx context.Context, doer, u *user_model.User, repo *repo_model.Repository) { issue_indexer.UpdateRepoIndexer(ctx, repo.ID) if setting.Indexer.RepoIndexerEnabled && !repo.IsEmpty { - code_indexer.UpdateRepoIndexer(repo) + code_indexer.UpdateRepoIndexer(repo, false) } if err := stats_indexer.UpdateRepoIndexer(repo); err != nil { log.Error("stats_indexer.UpdateRepoIndexer(%d) failed: %v", repo.ID, err) @@ -90,7 +90,7 @@ func (r *indexerNotifier) PushCommits(ctx context.Context, pusher *user_model.Us } if setting.Indexer.RepoIndexerEnabled && opts.RefFullName.BranchName() == repo.DefaultBranch { - code_indexer.UpdateRepoIndexer(repo) + code_indexer.UpdateRepoIndexer(repo, false) } if err := stats_indexer.UpdateRepoIndexer(repo); err != nil { log.Error("stats_indexer.UpdateRepoIndexer(%d) failed: %v", repo.ID, err) @@ -103,7 +103,7 @@ func (r *indexerNotifier) SyncPushCommits(ctx context.Context, pusher *user_mode } if setting.Indexer.RepoIndexerEnabled && opts.RefFullName.BranchName() == repo.DefaultBranch { - code_indexer.UpdateRepoIndexer(repo) + code_indexer.UpdateRepoIndexer(repo, false) } if err := stats_indexer.UpdateRepoIndexer(repo); err != nil { log.Error("stats_indexer.UpdateRepoIndexer(%d) failed: %v", repo.ID, err) @@ -112,7 +112,7 @@ func (r *indexerNotifier) SyncPushCommits(ctx context.Context, pusher *user_mode func (r *indexerNotifier) ChangeDefaultBranch(ctx context.Context, repo *repo_model.Repository) { if setting.Indexer.RepoIndexerEnabled && !repo.IsEmpty { - code_indexer.UpdateRepoIndexer(repo) + code_indexer.UpdateRepoIndexer(repo, false) } if err := stats_indexer.UpdateRepoIndexer(repo); err != nil { log.Error("stats_indexer.UpdateRepoIndexer(%d) failed: %v", repo.ID, err) diff --git a/services/wiki/wiki.go b/services/wiki/wiki.go index 1b921a44bdbc5..835f378bd9101 100644 --- a/services/wiki/wiki.go +++ b/services/wiki/wiki.go @@ -18,8 +18,10 @@ import ( user_model "code.gitea.io/gitea/models/user" "code.gitea.io/gitea/modules/git" "code.gitea.io/gitea/modules/gitrepo" + code_indexer "code.gitea.io/gitea/modules/indexer/code" "code.gitea.io/gitea/modules/log" repo_module "code.gitea.io/gitea/modules/repository" + "code.gitea.io/gitea/modules/setting" "code.gitea.io/gitea/modules/sync" "code.gitea.io/gitea/modules/util" asymkey_service "code.gitea.io/gitea/services/asymkey" @@ -45,6 +47,11 @@ func InitWiki(ctx context.Context, repo *repo_model.Repository) error { } else if _, _, err = git.NewCommand(ctx, "symbolic-ref", "HEAD").AddDynamicArguments(git.BranchPrefix + repo.DefaultWikiBranch).RunStdString(&git.RunOpts{Dir: repo.WikiPath()}); err != nil { return fmt.Errorf("unable to set default wiki branch to %q: %w", repo.DefaultWikiBranch, err) } + + if setting.Indexer.RepoIndexerEnabled && !repo.IsEmpty { + code_indexer.UpdateRepoIndexer(repo, true) + } + return nil } @@ -229,6 +236,10 @@ func updateWikiPage(ctx context.Context, doer *user_model.User, repo *repo_model return fmt.Errorf("Push: %w", err) } + if setting.Indexer.RepoIndexerEnabled && !repo.IsEmpty { + code_indexer.UpdateRepoIndexer(repo, true) + } + return nil } @@ -347,6 +358,10 @@ func DeleteWikiPage(ctx context.Context, doer *user_model.User, repo *repo_model return fmt.Errorf("Push: %w", err) } + if setting.Indexer.RepoIndexerEnabled && !repo.IsEmpty { + code_indexer.UpdateRepoIndexer(repo, true) + } + return nil } @@ -357,6 +372,11 @@ func DeleteWiki(ctx context.Context, repo *repo_model.Repository) error { } system_model.RemoveAllWithNotice(ctx, "Delete repository wiki", repo.WikiPath()) + + if setting.Indexer.RepoIndexerEnabled && !repo.IsEmpty { + code_indexer.UpdateRepoIndexer(repo, true) + } + return nil } @@ -364,7 +384,7 @@ func ChangeDefaultWikiBranch(ctx context.Context, repo *repo_model.Repository, n if !git.IsValidRefPattern(newBranch) { return fmt.Errorf("invalid branch name: %s", newBranch) } - return db.WithTx(ctx, func(ctx context.Context) error { + if err := db.WithTx(ctx, func(ctx context.Context) error { repo.DefaultWikiBranch = newBranch if err := repo_model.UpdateRepositoryCols(ctx, repo, "default_wiki_branch"); err != nil { return fmt.Errorf("unable to update database: %w", err) @@ -391,5 +411,12 @@ func ChangeDefaultWikiBranch(ctx context.Context, repo *repo_model.Repository, n return fmt.Errorf("unable to rename default branch: %w", err) } return nil - }) + }); err != nil { + return err + } + + if setting.Indexer.RepoIndexerEnabled && !repo.IsEmpty { + code_indexer.UpdateRepoIndexer(repo, true) + } + return nil } diff --git a/tests/integration/repo_search_test.go b/tests/integration/repo_search_test.go index cf199e98c2895..d10eaad5df2df 100644 --- a/tests/integration/repo_search_test.go +++ b/tests/integration/repo_search_test.go @@ -58,6 +58,6 @@ func testSearch(t *testing.T, url string, expected []string) { assert.EqualValues(t, expected, filenames) } -func executeIndexer(t *testing.T, repo *repo_model.Repository, op func(*repo_model.Repository)) { - op(repo) +func executeIndexer(t *testing.T, repo *repo_model.Repository, op func(*repo_model.Repository, bool)) { + op(repo, false) } From 8353a4cb58b95c000076ae5061f3eae0dcfc6c93 Mon Sep 17 00:00:00 2001 From: "m.huber" Date: Tue, 12 Mar 2024 02:30:34 +0100 Subject: [PATCH 12/14] add trace log --- modules/indexer/code/indexer.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/modules/indexer/code/indexer.go b/modules/indexer/code/indexer.go index 1811246a27e41..f93214c7c7952 100644 --- a/modules/indexer/code/indexer.go +++ b/modules/indexer/code/indexer.go @@ -277,6 +277,8 @@ func UpdateRepoIndexer(repo *repo_model.Repository, isWiki bool) { indexData := &internal.IndexerData{RepoID: repo.ID, IsWiki: isWiki} if err := indexerQueue.Push(indexData); err != nil { log.Error("Update repo index data %v failed: %v", indexData, err) + } else { + log.Trace("Push repo indexer task repo: %d (isWiki=%v)", repo.ID, isWiki) } } From d73a9c8dfe29a9b2cadb8d3fa3080a9caf95d3a2 Mon Sep 17 00:00:00 2001 From: "m.huber" Date: Tue, 12 Mar 2024 02:41:28 +0100 Subject: [PATCH 13/14] make update index on wiki work --- services/wiki/wiki.go | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/services/wiki/wiki.go b/services/wiki/wiki.go index 835f378bd9101..2aafffea96538 100644 --- a/services/wiki/wiki.go +++ b/services/wiki/wiki.go @@ -48,10 +48,6 @@ func InitWiki(ctx context.Context, repo *repo_model.Repository) error { return fmt.Errorf("unable to set default wiki branch to %q: %w", repo.DefaultWikiBranch, err) } - if setting.Indexer.RepoIndexerEnabled && !repo.IsEmpty { - code_indexer.UpdateRepoIndexer(repo, true) - } - return nil } @@ -93,6 +89,12 @@ func updateWikiPage(ctx context.Context, doer *user_model.User, repo *repo_model return err } + defer func() { + if setting.Indexer.RepoIndexerEnabled { + code_indexer.UpdateRepoIndexer(repo, true) + } + }() + if err = validateWebPath(newWikiName); err != nil { return err } @@ -236,10 +238,6 @@ func updateWikiPage(ctx context.Context, doer *user_model.User, repo *repo_model return fmt.Errorf("Push: %w", err) } - if setting.Indexer.RepoIndexerEnabled && !repo.IsEmpty { - code_indexer.UpdateRepoIndexer(repo, true) - } - return nil } @@ -261,6 +259,12 @@ func DeleteWikiPage(ctx context.Context, doer *user_model.User, repo *repo_model return err } + defer func() { + if setting.Indexer.RepoIndexerEnabled { + code_indexer.UpdateRepoIndexer(repo, true) + } + }() + wikiWorkingPool.CheckIn(fmt.Sprint(repo.ID)) defer wikiWorkingPool.CheckOut(fmt.Sprint(repo.ID)) @@ -358,10 +362,6 @@ func DeleteWikiPage(ctx context.Context, doer *user_model.User, repo *repo_model return fmt.Errorf("Push: %w", err) } - if setting.Indexer.RepoIndexerEnabled && !repo.IsEmpty { - code_indexer.UpdateRepoIndexer(repo, true) - } - return nil } @@ -373,7 +373,7 @@ func DeleteWiki(ctx context.Context, repo *repo_model.Repository) error { system_model.RemoveAllWithNotice(ctx, "Delete repository wiki", repo.WikiPath()) - if setting.Indexer.RepoIndexerEnabled && !repo.IsEmpty { + if setting.Indexer.RepoIndexerEnabled { code_indexer.UpdateRepoIndexer(repo, true) } @@ -415,7 +415,7 @@ func ChangeDefaultWikiBranch(ctx context.Context, repo *repo_model.Repository, n return err } - if setting.Indexer.RepoIndexerEnabled && !repo.IsEmpty { + if setting.Indexer.RepoIndexerEnabled { code_indexer.UpdateRepoIndexer(repo, true) } return nil From d04e8202955faf3b37650f5558df59d9f1e06e0f Mon Sep 17 00:00:00 2001 From: 6543 <6543@obermui.de> Date: Sat, 16 Mar 2024 19:50:37 +0100 Subject: [PATCH 14/14] fix lint --- modules/indexer/code/indexer.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/modules/indexer/code/indexer.go b/modules/indexer/code/indexer.go index f93214c7c7952..1eb4b71542d56 100644 --- a/modules/indexer/code/indexer.go +++ b/modules/indexer/code/indexer.go @@ -62,7 +62,9 @@ func index(ctx context.Context, indexer internal.Indexer, repoID int64, isWiki b return err } if status.CommitSha != "" { - indexer.Delete(ctx, repoID, optional.Some(isWiki)) + if err := indexer.Delete(ctx, repoID, optional.Some(isWiki)); err != nil { + return err + } } // ignore empty wikis return nil