diff --git a/models/fixtures/repo_unit.yml b/models/fixtures/repo_unit.yml index 8a22db0445..f6b6252da1 100644 --- a/models/fixtures/repo_unit.yml +++ b/models/fixtures/repo_unit.yml @@ -712,3 +712,24 @@ type: 3 config: "{\"IgnoreWhitespaceConflicts\":false,\"AllowMerge\":true,\"AllowRebase\":true,\"AllowRebaseMerge\":true,\"AllowSquash\":true}" created_unix: 946684810 + +- + id: 108 + repo_id: 62 + type: 1 + config: "{}" + created_unix: 946684810 + +- + id: 109 + repo_id: 62 + type: 2 + config: "{\"EnableTimetracker\":true,\"AllowOnlyContributorsToTrackTime\":true}" + created_unix: 946684810 + +- + id: 110 + repo_id: 62 + type: 3 + config: "{\"IgnoreWhitespaceConflicts\":false,\"AllowMerge\":true,\"AllowRebase\":true,\"AllowRebaseMerge\":true,\"AllowSquash\":true}" + created_unix: 946684810 diff --git a/models/fixtures/repository.yml b/models/fixtures/repository.yml index e141593f41..b7970cb7c8 100644 --- a/models/fixtures/repository.yml +++ b/models/fixtures/repository.yml @@ -1768,3 +1768,34 @@ size: 0 is_fsck_enabled: true close_issues_via_commit_in_any_branch: false + +- + id: 62 + owner_id: 42 + owner_name: org42 + lower_name: search-by-path + name: search-by-path + default_branch: master + num_watches: 0 + num_stars: 0 + num_forks: 0 + num_issues: 0 + num_closed_issues: 0 + num_pulls: 0 + num_closed_pulls: 0 + num_milestones: 0 + num_closed_milestones: 0 + num_projects: 0 + num_closed_projects: 0 + is_private: false + is_empty: false + is_archived: false + is_mirror: false + status: 0 + is_fork: false + fork_id: 0 + is_template: false + template_id: 0 + size: 0 + is_fsck_enabled: true + close_issues_via_commit_in_any_branch: false diff --git a/models/fixtures/user.yml b/models/fixtures/user.yml index 8504d88ce5..c0296deec5 100644 --- a/models/fixtures/user.yml +++ b/models/fixtures/user.yml @@ -1517,3 +1517,40 @@ repo_admin_change_team_access: false theme: "" keep_activity_private: false + +- + id: 42 + lower_name: org42 + name: org42 + full_name: Org42 + email: org42@example.com + keep_email_private: false + email_notifications_preference: onmention + passwd: ZogKvWdyEx:password + passwd_hash_algo: dummy + must_change_password: false + login_source: 0 + login_name: org42 + type: 1 + salt: ZogKvWdyEx + max_repo_creation: -1 + is_active: false + is_admin: false + is_restricted: false + allow_git_hook: false + allow_import_local: false + allow_create_organization: true + prohibit_login: false + avatar: avatar42 + avatar_email: org42@example.com + use_custom_avatar: false + num_followers: 0 + num_following: 0 + num_stars: 0 + num_repos: 1 + num_teams: 0 + num_members: 0 + visibility: 0 + repo_admin_change_team_access: false + theme: "" + keep_activity_private: false diff --git a/models/repo/repo_list_test.go b/models/repo/repo_list_test.go index 88cfcde620..ca6007f6c7 100644 --- a/models/repo/repo_list_test.go +++ b/models/repo/repo_list_test.go @@ -138,12 +138,12 @@ func getTestCases() []struct { { name: "AllPublic/PublicRepositoriesOfUserIncludingCollaborative", opts: &repo_model.SearchRepoOptions{ListOptions: db.ListOptions{Page: 1, PageSize: 10}, OwnerID: 15, AllPublic: true, Template: optional.Some(false)}, - count: 33, + count: 34, }, { name: "AllPublic/PublicAndPrivateRepositoriesOfUserIncludingCollaborative", opts: &repo_model.SearchRepoOptions{ListOptions: db.ListOptions{Page: 1, PageSize: 10}, OwnerID: 15, Private: true, AllPublic: true, AllLimited: true, Template: optional.Some(false)}, - count: 38, + count: 39, }, { name: "AllPublic/PublicAndPrivateRepositoriesOfUserIncludingCollaborativeByName", @@ -158,7 +158,7 @@ func getTestCases() []struct { { name: "AllPublic/PublicRepositoriesOfOrganization", opts: &repo_model.SearchRepoOptions{ListOptions: db.ListOptions{Page: 1, PageSize: 10}, OwnerID: 17, AllPublic: true, Collaborate: optional.Some(false), Template: optional.Some(false)}, - count: 33, + count: 34, }, { name: "AllTemplates", diff --git a/models/user/user_test.go b/models/user/user_test.go index 67efb3859f..bc1abc6451 100644 --- a/models/user/user_test.go +++ b/models/user/user_test.go @@ -92,7 +92,10 @@ func TestSearchUsers(t *testing.T) { testOrgSuccess(&user_model.SearchUserOptions{OrderBy: "id ASC", ListOptions: db.ListOptions{Page: 4, PageSize: 2}}, []int64{26, 41}) - testOrgSuccess(&user_model.SearchUserOptions{ListOptions: db.ListOptions{Page: 5, PageSize: 2}}, + testOrgSuccess(&user_model.SearchUserOptions{OrderBy: "id ASC", ListOptions: db.ListOptions{Page: 5, PageSize: 2}}, + []int64{42}) + + testOrgSuccess(&user_model.SearchUserOptions{ListOptions: db.ListOptions{Page: 6, PageSize: 2}}, []int64{}) // test users diff --git a/modules/indexer/code/bleve/bleve.go b/modules/indexer/code/bleve/bleve.go index c17f56d3cf..90e5e62bcb 100644 --- a/modules/indexer/code/bleve/bleve.go +++ b/modules/indexer/code/bleve/bleve.go @@ -17,6 +17,7 @@ import ( "code.gitea.io/gitea/modules/charset" "code.gitea.io/gitea/modules/git" "code.gitea.io/gitea/modules/gitrepo" + path_filter "code.gitea.io/gitea/modules/indexer/code/bleve/token/path" "code.gitea.io/gitea/modules/indexer/code/internal" indexer_internal "code.gitea.io/gitea/modules/indexer/internal" inner_bleve "code.gitea.io/gitea/modules/indexer/internal/bleve" @@ -53,6 +54,7 @@ type RepoIndexerData struct { RepoID int64 CommitID string Content string + Filename string Language string UpdatedAt time.Time } @@ -64,8 +66,10 @@ func (d *RepoIndexerData) Type() string { const ( repoIndexerAnalyzer = "repoIndexerAnalyzer" + filenameIndexerAnalyzer = "filenameIndexerAnalyzer" + filenameIndexerTokenizer = "filenameIndexerTokenizer" repoIndexerDocType = "repoIndexerDocType" - repoIndexerLatestVersion = 6 + repoIndexerLatestVersion = 7 ) // generateBleveIndexMapping generates a bleve index mapping for the repo indexer @@ -79,6 +83,11 @@ func generateBleveIndexMapping() (mapping.IndexMapping, error) { textFieldMapping.IncludeInAll = false docMapping.AddFieldMappingsAt("Content", textFieldMapping) + fileNamedMapping := bleve.NewTextFieldMapping() + fileNamedMapping.IncludeInAll = false + fileNamedMapping.Analyzer = filenameIndexerAnalyzer + docMapping.AddFieldMappingsAt("Filename", fileNamedMapping) + termFieldMapping := bleve.NewTextFieldMapping() termFieldMapping.IncludeInAll = false termFieldMapping.Analyzer = analyzer_keyword.Name @@ -90,6 +99,7 @@ func generateBleveIndexMapping() (mapping.IndexMapping, error) { docMapping.AddFieldMappingsAt("UpdatedAt", timeFieldMapping) mapping := bleve.NewIndexMapping() + if err := addUnicodeNormalizeTokenFilter(mapping); err != nil { return nil, err } else if err := mapping.AddCustomAnalyzer(repoIndexerAnalyzer, map[string]any{ @@ -100,6 +110,16 @@ func generateBleveIndexMapping() (mapping.IndexMapping, error) { }); err != nil { return nil, err } + + if err := mapping.AddCustomAnalyzer(filenameIndexerAnalyzer, map[string]any{ + "type": analyzer_custom.Name, + "char_filters": []string{}, + "tokenizer": unicode.Name, + "token_filters": []string{unicodeNormalizeName, path_filter.Name, lowercase.Name}, + }); err != nil { + return nil, err + } + mapping.DefaultAnalyzer = repoIndexerAnalyzer mapping.AddDocumentMapping(repoIndexerDocType, docMapping) mapping.AddDocumentMapping("_all", bleve.NewDocumentDisabledMapping()) @@ -174,6 +194,7 @@ func (b *Indexer) addUpdate(ctx context.Context, batchWriter git.WriteCloserErro return batch.Index(id, &RepoIndexerData{ RepoID: repo.ID, CommitID: commitSha, + Filename: update.Filename, Content: string(charset.ToUTF8DropErrors(fileContents, charset.ConvertOpts{})), Language: analyze.GetCodeLanguage(update.Filename, fileContents), UpdatedAt: time.Now().UTC(), @@ -240,14 +261,19 @@ func (b *Indexer) Search(ctx context.Context, opts *internal.SearchOptions) (int keywordQuery query.Query ) - phraseQuery := bleve.NewMatchPhraseQuery(opts.Keyword) - phraseQuery.FieldVal = "Content" - phraseQuery.Analyzer = repoIndexerAnalyzer - keywordQuery = phraseQuery + pathQuery := bleve.NewPrefixQuery(strings.ToLower(opts.Keyword)) + pathQuery.FieldVal = "Filename" + pathQuery.SetBoost(10) + + contentQuery := bleve.NewMatchQuery(opts.Keyword) + contentQuery.FieldVal = "Content" + if opts.IsKeywordFuzzy { - phraseQuery.Fuzziness = inner_bleve.GuessFuzzinessByKeyword(opts.Keyword) + contentQuery.Fuzziness = inner_bleve.GuessFuzzinessByKeyword(opts.Keyword) } + keywordQuery = bleve.NewDisjunctionQuery(contentQuery, pathQuery) + if len(opts.RepoIDs) > 0 { repoQueries := make([]query.Query, 0, len(opts.RepoIDs)) for _, repoID := range opts.RepoIDs { @@ -277,7 +303,7 @@ func (b *Indexer) Search(ctx context.Context, opts *internal.SearchOptions) (int from, pageSize := opts.GetSkipTake() searchRequest := bleve.NewSearchRequestOptions(indexerQuery, pageSize, from, false) - searchRequest.Fields = []string{"Content", "RepoID", "Language", "CommitID", "UpdatedAt"} + searchRequest.Fields = []string{"Content", "Filename", "RepoID", "Language", "CommitID", "UpdatedAt"} searchRequest.IncludeLocations = true if len(opts.Language) == 0 { @@ -307,6 +333,10 @@ func (b *Indexer) Search(ctx context.Context, opts *internal.SearchOptions) (int endIndex = locationEnd } } + if len(hit.Locations["Filename"]) > 0 { + startIndex, endIndex = internal.FilenameMatchIndexPos(hit.Fields["Content"].(string)) + } + language := hit.Fields["Language"].(string) var updatedUnix timeutil.TimeStamp if t, err := time.Parse(time.RFC3339, hit.Fields["UpdatedAt"].(string)); err == nil { diff --git a/modules/indexer/code/bleve/token/path/path.go b/modules/indexer/code/bleve/token/path/path.go new file mode 100644 index 0000000000..107e0da109 --- /dev/null +++ b/modules/indexer/code/bleve/token/path/path.go @@ -0,0 +1,101 @@ +// Copyright 2024 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package path + +import ( + "slices" + "strings" + + "github.com/blevesearch/bleve/v2/analysis" + "github.com/blevesearch/bleve/v2/registry" +) + +const ( + Name = "gitea/path" +) + +type TokenFilter struct{} + +func NewTokenFilter() *TokenFilter { + return &TokenFilter{} +} + +func TokenFilterConstructor(config map[string]any, cache *registry.Cache) (analysis.TokenFilter, error) { + return NewTokenFilter(), nil +} + +func (s *TokenFilter) Filter(input analysis.TokenStream) analysis.TokenStream { + if len(input) == 1 { + // if there is only one token, we dont need to generate the reversed chain + return generatePathTokens(input, false) + } + + normal := generatePathTokens(input, false) + reversed := generatePathTokens(input, true) + + return append(normal, reversed...) +} + +// Generates path tokens from the input tokens. +// This mimics the behavior of the path hierarchy tokenizer in ES. It takes the input tokens and combine them, generating a term for each component +// in tree (e.g., foo/bar/baz.md will generate foo, foo/bar, and foo/bar/baz.md). +// +// If the reverse flag is set, the order of the tokens is reversed (the same input will generate baz.md, baz.md/bar, baz.md/bar/foo). This is useful +// to efficiently search for filenames without supplying the fullpath. +func generatePathTokens(input analysis.TokenStream, reversed bool) analysis.TokenStream { + terms := make([]string, 0, len(input)) + longestTerm := 0 + + if reversed { + slices.Reverse(input) + } + + for i := 0; i < len(input); i++ { + var sb strings.Builder + sb.WriteString(string(input[0].Term)) + + for j := 1; j < i; j++ { + sb.WriteString("/") + sb.WriteString(string(input[j].Term)) + } + + term := sb.String() + + if longestTerm < len(term) { + longestTerm = len(term) + } + + terms = append(terms, term) + } + + output := make(analysis.TokenStream, 0, len(terms)) + + for _, term := range terms { + var start, end int + + if reversed { + start = 0 + end = len(term) + } else { + start = longestTerm - len(term) + end = longestTerm + } + + token := analysis.Token{ + Position: 1, + Start: start, + End: end, + Type: analysis.AlphaNumeric, + Term: []byte(term), + } + + output = append(output, &token) + } + + return output +} + +func init() { + registry.RegisterTokenFilter(Name, TokenFilterConstructor) +} diff --git a/modules/indexer/code/bleve/token/path/path_test.go b/modules/indexer/code/bleve/token/path/path_test.go new file mode 100644 index 0000000000..cc52021ef7 --- /dev/null +++ b/modules/indexer/code/bleve/token/path/path_test.go @@ -0,0 +1,76 @@ +// Copyright 2024 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package path + +import ( + "fmt" + "testing" + + "github.com/blevesearch/bleve/v2/analysis" + "github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode" + "github.com/stretchr/testify/assert" +) + +type Scenario struct { + Input string + Tokens []string +} + +func TestTokenFilter(t *testing.T) { + scenarios := []struct { + Input string + Terms []string + }{ + { + Input: "Dockerfile", + Terms: []string{"Dockerfile"}, + }, + { + Input: "Dockerfile.rootless", + Terms: []string{"Dockerfile.rootless"}, + }, + { + Input: "a/b/c/Dockerfile.rootless", + Terms: []string{"a", "a/b", "a/b/c", "a/b/c/Dockerfile.rootless", "Dockerfile.rootless", "Dockerfile.rootless/c", "Dockerfile.rootless/c/b", "Dockerfile.rootless/c/b/a"}, + }, + { + Input: "", + Terms: []string{}, + }, + } + + for _, scenario := range scenarios { + t.Run(fmt.Sprintf("ensure terms of '%s'", scenario.Input), func(t *testing.T) { + terms := extractTerms(scenario.Input) + + assert.Len(t, terms, len(scenario.Terms)) + + for _, term := range terms { + assert.Contains(t, scenario.Terms, term) + } + }) + } +} + +func extractTerms(input string) []string { + tokens := tokenize(input) + filteredTokens := filter(tokens) + terms := make([]string, 0, len(filteredTokens)) + + for _, token := range filteredTokens { + terms = append(terms, string(token.Term)) + } + + return terms +} + +func filter(input analysis.TokenStream) analysis.TokenStream { + filter := NewTokenFilter() + return filter.Filter(input) +} + +func tokenize(input string) analysis.TokenStream { + tokenizer := unicode.NewUnicodeTokenizer() + return tokenizer.Tokenize([]byte(input)) +} diff --git a/modules/indexer/code/elasticsearch/elasticsearch.go b/modules/indexer/code/elasticsearch/elasticsearch.go index 5c01034450..669a1bafcc 100644 --- a/modules/indexer/code/elasticsearch/elasticsearch.go +++ b/modules/indexer/code/elasticsearch/elasticsearch.go @@ -30,7 +30,7 @@ import ( ) const ( - esRepoIndexerLatestVersion = 1 + esRepoIndexerLatestVersion = 2 // multi-match-types, currently only 2 types are used // Reference: https://www.elastic.co/guide/en/elasticsearch/reference/7.0/query-dsl-multi-match-query.html#multi-match-types esMultiMatchTypeBestFields = "best_fields" @@ -57,12 +57,50 @@ func NewIndexer(url, indexerName string) *Indexer { const ( defaultMapping = `{ + "settings": { + "analysis": { + "analyzer": { + "filename_path_analyzer": { + "tokenizer": "path_tokenizer" + }, + "reversed_filename_path_analyzer": { + "tokenizer": "reversed_path_tokenizer" + } + }, + "tokenizer": { + "path_tokenizer": { + "type": "path_hierarchy", + "delimiter": "/" + }, + "reversed_path_tokenizer": { + "type": "path_hierarchy", + "delimiter": "/", + "reverse": true + } + } + } + }, "mappings": { "properties": { "repo_id": { "type": "long", "index": true }, + "filename": { + "type": "text", + "term_vector": "with_positions_offsets", + "index": true, + "fields": { + "path": { + "type": "text", + "analyzer": "reversed_filename_path_analyzer" + }, + "path_reversed": { + "type": "text", + "analyzer": "filename_path_analyzer" + } + } + }, "content": { "type": "text", "term_vector": "with_positions_offsets", @@ -136,6 +174,7 @@ func (b *Indexer) addUpdate(ctx context.Context, batchWriter git.WriteCloserErro Id(id). Doc(map[string]any{ "repo_id": repo.ID, + "filename": update.Filename, "content": string(charset.ToUTF8DropErrors(fileContents, charset.ConvertOpts{})), "commit_id": sha, "language": analyze.GetCodeLanguage(update.Filename, fileContents), @@ -231,11 +270,11 @@ func (b *Indexer) doDelete(ctx context.Context, repoID int64) error { return err } -// indexPos find words positions for start and the following end on content. It will +// contentMatchIndexPos find words positions for start and the following end on content. It will // return the beginning position of the first start and the ending position of the // first end following the start string. // If not found any of the positions, it will return -1, -1. -func indexPos(content, start, end string) (int, int) { +func contentMatchIndexPos(content, start, end string) (int, int) { startIdx := strings.Index(content, start) if startIdx < 0 { return -1, -1 @@ -244,22 +283,29 @@ func indexPos(content, start, end string) (int, int) { if endIdx < 0 { return -1, -1 } - return startIdx, startIdx + len(start) + endIdx + len(end) + return startIdx, (startIdx + len(start) + endIdx + len(end)) - 9 // remove the length since we give Content the original data } func convertResult(searchResult *elastic.SearchResult, kw string, pageSize int) (int64, []*internal.SearchResult, []*internal.SearchResultLanguages, error) { hits := make([]*internal.SearchResult, 0, pageSize) for _, hit := range searchResult.Hits.Hits { + repoID, fileName := internal.ParseIndexerID(hit.Id) + res := make(map[string]any) + if err := json.Unmarshal(hit.Source, &res); err != nil { + return 0, nil, nil, err + } + // FIXME: There is no way to get the position the keyword on the content currently on the same request. // So we get it from content, this may made the query slower. See // https://discuss.elastic.co/t/fetching-position-of-keyword-in-matched-document/94291 var startIndex, endIndex int - c, ok := hit.Highlight["content"] - if ok && len(c) > 0 { + if c, ok := hit.Highlight["filename"]; ok && len(c) > 0 { + startIndex, endIndex = internal.FilenameMatchIndexPos(res["content"].(string)) + } else if c, ok := hit.Highlight["content"]; ok && len(c) > 0 { // FIXME: Since the highlighting content will include and for the keywords, // now we should find the positions. But how to avoid html content which contains the // and tags? If elastic search has handled that? - startIndex, endIndex = indexPos(c[0], "", "") + startIndex, endIndex = contentMatchIndexPos(c[0], "", "") if startIndex == -1 { panic(fmt.Sprintf("1===%s,,,%#v,,,%s", kw, hit.Highlight, c[0])) } @@ -267,12 +313,6 @@ func convertResult(searchResult *elastic.SearchResult, kw string, pageSize int) panic(fmt.Sprintf("2===%#v", hit.Highlight)) } - repoID, fileName := internal.ParseIndexerID(hit.Id) - res := make(map[string]any) - if err := json.Unmarshal(hit.Source, &res); err != nil { - return 0, nil, nil, err - } - language := res["language"].(string) hits = append(hits, &internal.SearchResult{ @@ -283,7 +323,7 @@ func convertResult(searchResult *elastic.SearchResult, kw string, pageSize int) UpdatedUnix: timeutil.TimeStamp(res["updated_at"].(float64)), Language: language, StartIndex: startIndex, - EndIndex: endIndex - 9, // remove the length since we give Content the original data + EndIndex: endIndex, Color: enry.GetColor(language), }) } @@ -315,7 +355,10 @@ func (b *Indexer) Search(ctx context.Context, opts *internal.SearchOptions) (int searchType = esMultiMatchTypeBestFields } - kwQuery := elastic.NewMultiMatchQuery(opts.Keyword, "content").Type(searchType) + kwQuery := elastic.NewBoolQuery().Should( + elastic.NewMultiMatchQuery(opts.Keyword, "content").Type(searchType), + elastic.NewMultiMatchQuery(opts.Keyword, "filename^10").Type(esMultiMatchTypePhrasePrefix), + ) query := elastic.NewBoolQuery() query = query.Must(kwQuery) if len(opts.RepoIDs) > 0 { @@ -341,6 +384,7 @@ func (b *Indexer) Search(ctx context.Context, opts *internal.SearchOptions) (int Highlight( elastic.NewHighlight(). Field("content"). + Field("filename"). NumOfFragments(0). // return all highting content on fragments HighlighterType("fvh"), ). @@ -373,6 +417,7 @@ func (b *Indexer) Search(ctx context.Context, opts *internal.SearchOptions) (int Highlight( elastic.NewHighlight(). Field("content"). + Field("filename"). NumOfFragments(0). // return all highting content on fragments HighlighterType("fvh"), ). diff --git a/modules/indexer/code/elasticsearch/elasticsearch_test.go b/modules/indexer/code/elasticsearch/elasticsearch_test.go index c6ba93e76d..a6d2af92b2 100644 --- a/modules/indexer/code/elasticsearch/elasticsearch_test.go +++ b/modules/indexer/code/elasticsearch/elasticsearch_test.go @@ -10,7 +10,7 @@ import ( ) func TestIndexPos(t *testing.T) { - startIdx, endIdx := indexPos("test index start and end", "start", "end") + startIdx, endIdx := contentMatchIndexPos("test index start and end", "start", "end") assert.EqualValues(t, 11, startIdx) - assert.EqualValues(t, 24, endIdx) + assert.EqualValues(t, 15, endIdx) } diff --git a/modules/indexer/code/indexer_test.go b/modules/indexer/code/indexer_test.go index 8975c5ce40..5b33528dcd 100644 --- a/modules/indexer/code/indexer_test.go +++ b/modules/indexer/code/indexer_test.go @@ -6,6 +6,7 @@ package code import ( "context" "os" + "slices" "testing" "code.gitea.io/gitea/models/db" @@ -20,53 +21,166 @@ import ( _ "code.gitea.io/gitea/models/activities" "github.com/stretchr/testify/assert" + + _ "github.com/mattn/go-sqlite3" ) +type codeSearchResult struct { + Filename string + Content string +} + func TestMain(m *testing.M) { unittest.MainTest(m) } func testIndexer(name string, t *testing.T, indexer internal.Indexer) { t.Run(name, func(t *testing.T) { - var repoID int64 = 1 - err := index(git.DefaultContext, indexer, repoID) - assert.NoError(t, err) + assert.NoError(t, setupRepositoryIndexes(git.DefaultContext, indexer)) + keywords := []struct { RepoIDs []int64 Keyword string - IDs []int64 Langs int + Results []codeSearchResult }{ + // Search for an exact match on the contents of a file + // This scenario yields a single result (the file README.md on the repo '1') { RepoIDs: nil, Keyword: "Description", - IDs: []int64{repoID}, Langs: 1, + Results: []codeSearchResult{ + { + Filename: "README.md", + Content: "# repo1\n\nDescription for repo1", + }, + }, }, + // Search for an exact match on the contents of a file within the repo '2'. + // This scenario yields no results { RepoIDs: []int64{2}, Keyword: "Description", - IDs: []int64{}, Langs: 0, }, + // Search for an exact match on the contents of a file + // This scenario yields a single result (the file README.md on the repo '1') { RepoIDs: nil, Keyword: "repo1", - IDs: []int64{repoID}, Langs: 1, + Results: []codeSearchResult{ + { + Filename: "README.md", + Content: "# repo1\n\nDescription for repo1", + }, + }, }, + // Search for an exact match on the contents of a file within the repo '2'. + // This scenario yields no results { RepoIDs: []int64{2}, Keyword: "repo1", - IDs: []int64{}, Langs: 0, }, + // Search for a non-existing term. + // This scenario yields no results { RepoIDs: nil, Keyword: "non-exist", - IDs: []int64{}, Langs: 0, }, + // Search for an exact match on the contents of a file within the repo '62'. + // This scenario yields a single result (the file avocado.md on the repo '62') + { + RepoIDs: []int64{62}, + Keyword: "pineaple", + Langs: 1, + Results: []codeSearchResult{ + { + Filename: "avocado.md", + Content: "# repo1\n\npineaple pie of cucumber juice", + }, + }, + }, + // Search for an exact match on the filename within the repo '62'. + // This scenario yields a single result (the file avocado.md on the repo '62') + { + RepoIDs: []int64{62}, + Keyword: "avocado.md", + Langs: 1, + Results: []codeSearchResult{ + { + Filename: "avocado.md", + Content: "# repo1\n\npineaple pie of cucumber juice", + }, + }, + }, + // Search for an partial match on the filename within the repo '62'. + // This scenario yields a single result (the file avocado.md on the repo '62') + { + RepoIDs: []int64{62}, + Keyword: "avo", + Langs: 1, + Results: []codeSearchResult{ + { + Filename: "avocado.md", + Content: "# repo1\n\npineaple pie of cucumber juice", + }, + }, + }, + // Search for matches on both the contents and the filenames within the repo '62'. + // This scenario yields two results: the first result is baed on the file (cucumber.md) while the second is based on the contents + { + RepoIDs: []int64{62}, + Keyword: "cucumber", + Langs: 1, + Results: []codeSearchResult{ + { + Filename: "cucumber.md", + Content: "Salad is good for your health", + }, + { + Filename: "avocado.md", + Content: "# repo1\n\npineaple pie of cucumber juice", + }, + }, + }, + // Search for matches on the filenames within the repo '62'. + // This scenario yields two results (both are based on filename, the first one is an exact match) + { + RepoIDs: []int64{62}, + Keyword: "ham", + Langs: 1, + Results: []codeSearchResult{ + { + Filename: "ham.md", + Content: "This is also not cheese", + }, + { + Filename: "potato/ham.md", + Content: "This is not cheese", + }, + }, + }, + // Search for matches on the contents of files within the repo '62'. + // This scenario yields two results (both are based on contents, the first one is an exact match where as the second is a 'fuzzy' one) + { + RepoIDs: []int64{62}, + Keyword: "This is not cheese", + Langs: 1, + Results: []codeSearchResult{ + { + Filename: "potato/ham.md", + Content: "This is not cheese", + }, + { + Filename: "ham.md", + Content: "This is also not cheese", + }, + }, + }, } for _, kw := range keywords { @@ -81,19 +195,37 @@ func testIndexer(name string, t *testing.T, indexer internal.Indexer) { IsKeywordFuzzy: true, }) assert.NoError(t, err) - assert.Len(t, kw.IDs, int(total)) assert.Len(t, langs, kw.Langs) - ids := make([]int64, 0, len(res)) - for _, hit := range res { - ids = append(ids, hit.RepoID) - assert.EqualValues(t, "# repo1\n\nDescription for repo1", hit.Content) + hits := make([]codeSearchResult, 0, len(res)) + + if total > 0 { + assert.NotEmpty(t, kw.Results, "The given scenario does not provide any expected results") + } + + for _, hit := range res { + hits = append(hits, codeSearchResult{ + Filename: hit.Filename, + Content: hit.Content, + }) + } + + lastIndex := -1 + + for _, expected := range kw.Results { + index := slices.Index(hits, expected) + if index == -1 { + assert.Failf(t, "Result not found", "Expected %v in %v", expected, hits) + } else if lastIndex > index { + assert.Failf(t, "Result is out of order", "The order of %v within %v is wrong", expected, hits) + } else { + lastIndex = index + } } - assert.EqualValues(t, kw.IDs, ids) }) } - assert.NoError(t, indexer.Delete(context.Background(), repoID)) + assert.NoError(t, tearDownRepositoryIndexes(indexer)) }) } @@ -136,3 +268,25 @@ func TestESIndexAndSearch(t *testing.T) { testIndexer("elastic_search", t, indexer) } + +func setupRepositoryIndexes(ctx context.Context, indexer internal.Indexer) error { + for _, repoID := range repositoriesToSearch() { + if err := index(ctx, indexer, repoID); err != nil { + return err + } + } + return nil +} + +func tearDownRepositoryIndexes(indexer internal.Indexer) error { + for _, repoID := range repositoriesToSearch() { + if err := indexer.Delete(context.Background(), repoID); err != nil { + return err + } + } + return nil +} + +func repositoriesToSearch() []int64 { + return []int64{1, 62} +} diff --git a/modules/indexer/code/internal/util.go b/modules/indexer/code/internal/util.go index 689c4f4584..5b95783d9f 100644 --- a/modules/indexer/code/internal/util.go +++ b/modules/indexer/code/internal/util.go @@ -10,6 +10,10 @@ import ( "code.gitea.io/gitea/modules/log" ) +const ( + filenameMatchNumberOfLines = 7 // Copied from github search +) + func FilenameIndexerID(repoID int64, filename string) string { return internal.Base36(repoID) + "_" + filename } @@ -30,3 +34,17 @@ func FilenameOfIndexerID(indexerID string) string { } return indexerID[index+1:] } + +// Given the contents of file, returns the boundaries of its first seven lines. +func FilenameMatchIndexPos(content string) (int, int) { + count := 1 + for i, c := range content { + if c == '\n' { + count++ + if count == filenameMatchNumberOfLines { + return 0, i + } + } + } + return 0, len(content) +} diff --git a/modules/indexer/internal/bleve/util.go b/modules/indexer/internal/bleve/util.go index a2265f86e6..b426b39bc2 100644 --- a/modules/indexer/internal/bleve/util.go +++ b/modules/indexer/internal/bleve/util.go @@ -11,10 +11,15 @@ import ( "code.gitea.io/gitea/modules/util" "github.com/blevesearch/bleve/v2" + "github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode" "github.com/blevesearch/bleve/v2/index/upsidedown" "github.com/ethantkoenig/rupture" ) +const ( + maxFuzziness = 2 +) + // openIndexer open the index at the specified path, checking for metadata // updates and bleve version updates. If index needs to be created (or // re-created), returns (nil, nil) @@ -48,7 +53,27 @@ func openIndexer(path string, latestVersion int) (bleve.Index, int, error) { return index, 0, nil } +// This method test the GuessFuzzinessByKeyword method. The fuzziness is based on the levenshtein distance and determines how many chars +// may be different on two string and they still be considered equivalent. +// Given a phrasse, its shortest word determines its fuzziness. If a phrase uses CJK (eg: `갃갃갃` `啊啊啊`), the fuzziness is zero. func GuessFuzzinessByKeyword(s string) int { + tokenizer := unicode.NewUnicodeTokenizer() + tokens := tokenizer.Tokenize([]byte(s)) + + if len(tokens) > 0 { + fuzziness := maxFuzziness + + for _, token := range tokens { + fuzziness = min(fuzziness, guessFuzzinessByKeyword(string(token.Term))) + } + + return fuzziness + } + + return 0 +} + +func guessFuzzinessByKeyword(s string) int { // according to https://github.com/blevesearch/bleve/issues/1563, the supported max fuzziness is 2 // magic number 4 was chosen to determine the levenshtein distance per each character of a keyword // BUT, when using CJK (eg: `갃갃갃` `啊啊啊`), it mismatches a lot. @@ -57,5 +82,5 @@ func GuessFuzzinessByKeyword(s string) int { return 0 } } - return min(2, len(s)/4) + return min(maxFuzziness, len(s)/4) } diff --git a/modules/indexer/internal/bleve/util_test.go b/modules/indexer/internal/bleve/util_test.go new file mode 100644 index 0000000000..ae0b12c08d --- /dev/null +++ b/modules/indexer/internal/bleve/util_test.go @@ -0,0 +1,45 @@ +// Copyright 2024 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package bleve + +import ( + "fmt" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestBleveGuessFuzzinessByKeyword(t *testing.T) { + scenarios := []struct { + Input string + Fuzziness int // See util.go for the definition of fuzziness in this particular context + }{ + { + Input: "", + Fuzziness: 0, + }, + { + Input: "Avocado", + Fuzziness: 1, + }, + { + Input: "Geschwindigkeit", + Fuzziness: 2, + }, + { + Input: "non-exist", + Fuzziness: 0, + }, + { + Input: "갃갃갃", + Fuzziness: 0, + }, + } + + for _, scenario := range scenarios { + t.Run(fmt.Sprintf("ensure fuzziness of '%s' is '%d'", scenario.Input, scenario.Fuzziness), func(t *testing.T) { + assert.Equal(t, scenario.Fuzziness, GuessFuzzinessByKeyword(scenario.Input)) + }) + } +} diff --git a/tests/gitea-repositories-meta/org42/search-by-path.git/GIT_COLA_MSG b/tests/gitea-repositories-meta/org42/search-by-path.git/GIT_COLA_MSG new file mode 100644 index 0000000000..8b13789179 --- /dev/null +++ b/tests/gitea-repositories-meta/org42/search-by-path.git/GIT_COLA_MSG @@ -0,0 +1 @@ + diff --git a/tests/gitea-repositories-meta/org42/search-by-path.git/HEAD b/tests/gitea-repositories-meta/org42/search-by-path.git/HEAD new file mode 100644 index 0000000000..cb089cd89a --- /dev/null +++ b/tests/gitea-repositories-meta/org42/search-by-path.git/HEAD @@ -0,0 +1 @@ +ref: refs/heads/master diff --git a/tests/gitea-repositories-meta/org42/search-by-path.git/config b/tests/gitea-repositories-meta/org42/search-by-path.git/config new file mode 100644 index 0000000000..07d359d07c --- /dev/null +++ b/tests/gitea-repositories-meta/org42/search-by-path.git/config @@ -0,0 +1,4 @@ +[core] + repositoryformatversion = 0 + filemode = true + bare = true diff --git a/tests/gitea-repositories-meta/org42/search-by-path.git/description b/tests/gitea-repositories-meta/org42/search-by-path.git/description new file mode 100644 index 0000000000..382e2d7f10 --- /dev/null +++ b/tests/gitea-repositories-meta/org42/search-by-path.git/description @@ -0,0 +1,8 @@ +This repository will be used to test code search. The snippet below shows its directory structure + +. +├── avocado.md +├── cucumber.md +├── ham.md +└── potato + └── ham.md diff --git a/tests/gitea-repositories-meta/org42/search-by-path.git/hooks/post-receive b/tests/gitea-repositories-meta/org42/search-by-path.git/hooks/post-receive new file mode 100755 index 0000000000..4b3d452abc --- /dev/null +++ b/tests/gitea-repositories-meta/org42/search-by-path.git/hooks/post-receive @@ -0,0 +1,7 @@ +#!/usr/bin/env bash +ORI_DIR=`pwd` +SHELL_FOLDER=$(cd "$(dirname "$0")";pwd) +cd "$ORI_DIR" +for i in `ls "$SHELL_FOLDER/post-receive.d"`; do + sh "$SHELL_FOLDER/post-receive.d/$i" +done \ No newline at end of file diff --git a/tests/gitea-repositories-meta/org42/search-by-path.git/hooks/post-receive.d/gitea b/tests/gitea-repositories-meta/org42/search-by-path.git/hooks/post-receive.d/gitea new file mode 100755 index 0000000000..43a948da3a --- /dev/null +++ b/tests/gitea-repositories-meta/org42/search-by-path.git/hooks/post-receive.d/gitea @@ -0,0 +1,2 @@ +#!/usr/bin/env bash +"$GITEA_ROOT/gitea" hook --config="$GITEA_ROOT/$GITEA_CONF" post-receive diff --git a/tests/gitea-repositories-meta/org42/search-by-path.git/hooks/pre-receive b/tests/gitea-repositories-meta/org42/search-by-path.git/hooks/pre-receive new file mode 100755 index 0000000000..4127013053 --- /dev/null +++ b/tests/gitea-repositories-meta/org42/search-by-path.git/hooks/pre-receive @@ -0,0 +1,7 @@ +#!/usr/bin/env bash +ORI_DIR=`pwd` +SHELL_FOLDER=$(cd "$(dirname "$0")";pwd) +cd "$ORI_DIR" +for i in `ls "$SHELL_FOLDER/pre-receive.d"`; do + sh "$SHELL_FOLDER/pre-receive.d/$i" +done \ No newline at end of file diff --git a/tests/gitea-repositories-meta/org42/search-by-path.git/hooks/pre-receive.d/gitea b/tests/gitea-repositories-meta/org42/search-by-path.git/hooks/pre-receive.d/gitea new file mode 100755 index 0000000000..49d0940636 --- /dev/null +++ b/tests/gitea-repositories-meta/org42/search-by-path.git/hooks/pre-receive.d/gitea @@ -0,0 +1,2 @@ +#!/usr/bin/env bash +"$GITEA_ROOT/gitea" hook --config="$GITEA_ROOT/$GITEA_CONF" pre-receive diff --git a/tests/gitea-repositories-meta/org42/search-by-path.git/hooks/proc-receive b/tests/gitea-repositories-meta/org42/search-by-path.git/hooks/proc-receive new file mode 100755 index 0000000000..af2808b037 --- /dev/null +++ b/tests/gitea-repositories-meta/org42/search-by-path.git/hooks/proc-receive @@ -0,0 +1,7 @@ +#!/usr/bin/env bash +ORI_DIR=`pwd` +SHELL_FOLDER=$(cd "$(dirname "$0")";pwd) +cd "$ORI_DIR" +for i in `ls "$SHELL_FOLDER/proc-receive.d"`; do + sh "$SHELL_FOLDER/proc-receive.d/$i" +done diff --git a/tests/gitea-repositories-meta/org42/search-by-path.git/hooks/proc-receive.d/gitea b/tests/gitea-repositories-meta/org42/search-by-path.git/hooks/proc-receive.d/gitea new file mode 100755 index 0000000000..97521c6211 --- /dev/null +++ b/tests/gitea-repositories-meta/org42/search-by-path.git/hooks/proc-receive.d/gitea @@ -0,0 +1,2 @@ +#!/usr/bin/env bash +"$GITEA_ROOT/gitea" hook --config="$GITEA_ROOT/$GITEA_CONF" proc-receive diff --git a/tests/gitea-repositories-meta/org42/search-by-path.git/hooks/update b/tests/gitea-repositories-meta/org42/search-by-path.git/hooks/update new file mode 100755 index 0000000000..c186fe4a18 --- /dev/null +++ b/tests/gitea-repositories-meta/org42/search-by-path.git/hooks/update @@ -0,0 +1,7 @@ +#!/usr/bin/env bash +ORI_DIR=`pwd` +SHELL_FOLDER=$(cd "$(dirname "$0")";pwd) +cd "$ORI_DIR" +for i in `ls "$SHELL_FOLDER/update.d"`; do + sh "$SHELL_FOLDER/update.d/$i" $1 $2 $3 +done \ No newline at end of file diff --git a/tests/gitea-repositories-meta/org42/search-by-path.git/hooks/update.d/gitea b/tests/gitea-repositories-meta/org42/search-by-path.git/hooks/update.d/gitea new file mode 100755 index 0000000000..38101c2426 --- /dev/null +++ b/tests/gitea-repositories-meta/org42/search-by-path.git/hooks/update.d/gitea @@ -0,0 +1,2 @@ +#!/usr/bin/env bash +"$GITEA_ROOT/gitea" hook --config="$GITEA_ROOT/$GITEA_CONF" update $1 $2 $3 diff --git a/tests/gitea-repositories-meta/org42/search-by-path.git/info/exclude b/tests/gitea-repositories-meta/org42/search-by-path.git/info/exclude new file mode 100644 index 0000000000..a5196d1be8 --- /dev/null +++ b/tests/gitea-repositories-meta/org42/search-by-path.git/info/exclude @@ -0,0 +1,6 @@ +# git ls-files --others --exclude-from=.git/info/exclude +# Lines that start with '#' are comments. +# For a project mostly in C, the following would be a good set of +# exclude patterns (uncomment them if you want to use them): +# *.[oa] +# *~ diff --git a/tests/gitea-repositories-meta/org42/search-by-path.git/info/refs b/tests/gitea-repositories-meta/org42/search-by-path.git/info/refs new file mode 100644 index 0000000000..6b948c96a8 --- /dev/null +++ b/tests/gitea-repositories-meta/org42/search-by-path.git/info/refs @@ -0,0 +1,13 @@ +90c1019714259b24fb81711d4416ac0f18667dfa refs/heads/DefaultBranch +985f0301dba5e7b34be866819cd15ad3d8f508ee refs/heads/branch2 +65f1bf27bc3bf70f64657658635e66094edbcb4d refs/heads/develop +65f1bf27bc3bf70f64657658635e66094edbcb4d refs/heads/feature/1 +78fb907e3a3309eae4fe8fef030874cebbf1cd5e refs/heads/home-md-img-check +3731fe53b763859aaf83e703ee731f6b9447ff1e refs/heads/master +62fb502a7172d4453f0322a2cc85bddffa57f07a refs/heads/pr-to-update +4649299398e4d39a5c09eb4f534df6f1e1eb87cc refs/heads/sub-home-md-img-check +3fa2f829675543ecfc16b2891aebe8bf0608a8f4 refs/notes/commits +4a357436d925b5c974181ff12a994538ddc5a269 refs/pull/2/head +5f22f7d0d95d614d25a5b68592adb345a4b5c7fd refs/pull/3/head +62fb502a7172d4453f0322a2cc85bddffa57f07a refs/pull/5/head +65f1bf27bc3bf70f64657658635e66094edbcb4d refs/tags/v1.1 diff --git a/tests/gitea-repositories-meta/org42/search-by-path.git/logs/refs/heads/master b/tests/gitea-repositories-meta/org42/search-by-path.git/logs/refs/heads/master new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/gitea-repositories-meta/org42/search-by-path.git/objects/info/commit-graph b/tests/gitea-repositories-meta/org42/search-by-path.git/objects/info/commit-graph new file mode 100644 index 0000000000..b38715bb92 Binary files /dev/null and b/tests/gitea-repositories-meta/org42/search-by-path.git/objects/info/commit-graph differ diff --git a/tests/gitea-repositories-meta/org42/search-by-path.git/objects/info/packs b/tests/gitea-repositories-meta/org42/search-by-path.git/objects/info/packs new file mode 100644 index 0000000000..b2af8c8378 --- /dev/null +++ b/tests/gitea-repositories-meta/org42/search-by-path.git/objects/info/packs @@ -0,0 +1,2 @@ +P pack-393dc29256bc27cb2ec73898507df710be7a3cf5.pack + diff --git a/tests/gitea-repositories-meta/org42/search-by-path.git/objects/pack/pack-393dc29256bc27cb2ec73898507df710be7a3cf5.bitmap b/tests/gitea-repositories-meta/org42/search-by-path.git/objects/pack/pack-393dc29256bc27cb2ec73898507df710be7a3cf5.bitmap new file mode 100644 index 0000000000..1fdef225e8 Binary files /dev/null and b/tests/gitea-repositories-meta/org42/search-by-path.git/objects/pack/pack-393dc29256bc27cb2ec73898507df710be7a3cf5.bitmap differ diff --git a/tests/gitea-repositories-meta/org42/search-by-path.git/objects/pack/pack-393dc29256bc27cb2ec73898507df710be7a3cf5.idx b/tests/gitea-repositories-meta/org42/search-by-path.git/objects/pack/pack-393dc29256bc27cb2ec73898507df710be7a3cf5.idx new file mode 100644 index 0000000000..0d930e7499 Binary files /dev/null and b/tests/gitea-repositories-meta/org42/search-by-path.git/objects/pack/pack-393dc29256bc27cb2ec73898507df710be7a3cf5.idx differ diff --git a/tests/gitea-repositories-meta/org42/search-by-path.git/objects/pack/pack-393dc29256bc27cb2ec73898507df710be7a3cf5.pack b/tests/gitea-repositories-meta/org42/search-by-path.git/objects/pack/pack-393dc29256bc27cb2ec73898507df710be7a3cf5.pack new file mode 100644 index 0000000000..f1aac1e740 Binary files /dev/null and b/tests/gitea-repositories-meta/org42/search-by-path.git/objects/pack/pack-393dc29256bc27cb2ec73898507df710be7a3cf5.pack differ diff --git a/tests/gitea-repositories-meta/org42/search-by-path.git/objects/pack/pack-393dc29256bc27cb2ec73898507df710be7a3cf5.rev b/tests/gitea-repositories-meta/org42/search-by-path.git/objects/pack/pack-393dc29256bc27cb2ec73898507df710be7a3cf5.rev new file mode 100644 index 0000000000..869860ba61 Binary files /dev/null and b/tests/gitea-repositories-meta/org42/search-by-path.git/objects/pack/pack-393dc29256bc27cb2ec73898507df710be7a3cf5.rev differ diff --git a/tests/gitea-repositories-meta/org42/search-by-path.git/packed-refs b/tests/gitea-repositories-meta/org42/search-by-path.git/packed-refs new file mode 100644 index 0000000000..70e69af1e1 --- /dev/null +++ b/tests/gitea-repositories-meta/org42/search-by-path.git/packed-refs @@ -0,0 +1,14 @@ +# pack-refs with: peeled fully-peeled sorted +90c1019714259b24fb81711d4416ac0f18667dfa refs/heads/DefaultBranch +985f0301dba5e7b34be866819cd15ad3d8f508ee refs/heads/branch2 +65f1bf27bc3bf70f64657658635e66094edbcb4d refs/heads/develop +65f1bf27bc3bf70f64657658635e66094edbcb4d refs/heads/feature/1 +78fb907e3a3309eae4fe8fef030874cebbf1cd5e refs/heads/home-md-img-check +3731fe53b763859aaf83e703ee731f6b9447ff1e refs/heads/master +62fb502a7172d4453f0322a2cc85bddffa57f07a refs/heads/pr-to-update +4649299398e4d39a5c09eb4f534df6f1e1eb87cc refs/heads/sub-home-md-img-check +3fa2f829675543ecfc16b2891aebe8bf0608a8f4 refs/notes/commits +4a357436d925b5c974181ff12a994538ddc5a269 refs/pull/2/head +5f22f7d0d95d614d25a5b68592adb345a4b5c7fd refs/pull/3/head +62fb502a7172d4453f0322a2cc85bddffa57f07a refs/pull/5/head +65f1bf27bc3bf70f64657658635e66094edbcb4d refs/tags/v1.1 diff --git a/tests/integration/api_org_test.go b/tests/integration/api_org_test.go index 70d3a446f7..fff121490c 100644 --- a/tests/integration/api_org_test.go +++ b/tests/integration/api_org_test.go @@ -177,7 +177,7 @@ func TestAPIGetAll(t *testing.T) { var apiOrgList []*api.Organization DecodeJSON(t, resp, &apiOrgList) - assert.Len(t, apiOrgList, 12) + assert.Len(t, apiOrgList, 13) assert.Equal(t, "Limited Org 36", apiOrgList[1].FullName) assert.Equal(t, "limited", apiOrgList[1].Visibility) @@ -186,7 +186,7 @@ func TestAPIGetAll(t *testing.T) { resp = MakeRequest(t, req, http.StatusOK) DecodeJSON(t, resp, &apiOrgList) - assert.Len(t, apiOrgList, 8) + assert.Len(t, apiOrgList, 9) assert.Equal(t, "org 17", apiOrgList[0].FullName) assert.Equal(t, "public", apiOrgList[0].Visibility) } diff --git a/tests/integration/api_repo_test.go b/tests/integration/api_repo_test.go index 716da762e5..93c9ca0920 100644 --- a/tests/integration/api_repo_test.go +++ b/tests/integration/api_repo_test.go @@ -94,9 +94,9 @@ func TestAPISearchRepo(t *testing.T) { }{ { name: "RepositoriesMax50", requestURL: "/api/v1/repos/search?limit=50&private=false", expectedResults: expectedResults{ - nil: {count: 35}, - user: {count: 35}, - user2: {count: 35}, + nil: {count: 36}, + user: {count: 36}, + user2: {count: 36}, }, }, {