Monorepo for Tangled tangled.org

appview/indexer: add indexer for repos #1213

merged opened by oppi.li targeting master from op/nzpklsurrukv
Labels

None yet.

assignee

None yet.

Participants 1
AT URI
at://did:plc:qfpnj4og54vl56wngdriaxug/sh.tangled.repo.pull/3mhpxt77xcl22
+1047
Diff #0
+4
appview/indexer/indexer.go
··· 7 7 "tangled.org/core/appview/db" 8 8 issues_indexer "tangled.org/core/appview/indexer/issues" 9 9 pulls_indexer "tangled.org/core/appview/indexer/pulls" 10 + repos_indexer "tangled.org/core/appview/indexer/repos" 10 11 "tangled.org/core/appview/notify" 11 12 tlog "tangled.org/core/log" 12 13 ) ··· 14 15 type Indexer struct { 15 16 Issues *issues_indexer.Indexer 16 17 Pulls *pulls_indexer.Indexer 18 + Repos *repos_indexer.Indexer 17 19 logger *slog.Logger 18 20 notify.BaseNotifier 19 21 } ··· 22 24 return &Indexer{ 23 25 issues_indexer.NewIndexer("indexes/issues.bleve"), 24 26 pulls_indexer.NewIndexer("indexes/pulls.bleve"), 27 + repos_indexer.NewIndexer("indexes/repos.bleve"), 25 28 logger, 26 29 notify.BaseNotifier{}, 27 30 } ··· 32 35 ctx = tlog.IntoContext(ctx, ix.logger) 33 36 ix.Issues.Init(ctx, db) 34 37 ix.Pulls.Init(ctx, db) 38 + ix.Repos.Init(ctx, db) 35 39 return nil 36 40 }
+9
appview/indexer/notifier.go
··· 73 73 l.Error("failed to index a pr", "err", err) 74 74 } 75 75 } 76 + 77 + func (ix *Indexer) NewRepo(ctx context.Context, repo *models.Repo) { 78 + l := log.FromContext(ctx).With("notifier", "indexer", "repo", repo) 79 + l.Debug("indexing new repo") 80 + err := ix.Repos.Index(ctx, *repo) 81 + if err != nil { 82 + l.Error("failed to index a repo", "err", err) 83 + } 84 + }
+372
appview/indexer/repos/indexer.go
··· 1 + // heavily inspired by gitea's model (basically copy-pasted) 2 + package repos_indexer 3 + 4 + import ( 5 + "context" 6 + "errors" 7 + "log" 8 + "os" 9 + 10 + "github.com/blevesearch/bleve/v2" 11 + "github.com/blevesearch/bleve/v2/analysis/analyzer/custom" 12 + "github.com/blevesearch/bleve/v2/analysis/token/camelcase" 13 + "github.com/blevesearch/bleve/v2/analysis/token/lowercase" 14 + "github.com/blevesearch/bleve/v2/analysis/token/ngram" 15 + "github.com/blevesearch/bleve/v2/analysis/token/unicodenorm" 16 + "github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode" 17 + "github.com/blevesearch/bleve/v2/index/upsidedown" 18 + "github.com/blevesearch/bleve/v2/mapping" 19 + "github.com/blevesearch/bleve/v2/search/query" 20 + "tangled.org/core/appview/db" 21 + "tangled.org/core/appview/indexer/base36" 22 + bleveutil "tangled.org/core/appview/indexer/bleve" 23 + "tangled.org/core/appview/models" 24 + "tangled.org/core/appview/pagination" 25 + tlog "tangled.org/core/log" 26 + ) 27 + 28 + const ( 29 + repoIndexerAnalyzer = "repoIndexer" 30 + repoIndexerDocType = "repoIndexerDocType" 31 + 32 + unicodeNormalizeName = "unicodeNormalize" 33 + 34 + // Bump this when the index mapping changes to trigger a rebuild. 35 + repoIndexerVersion = 5 36 + ) 37 + 38 + type Indexer struct { 39 + indexer bleve.Index 40 + path string 41 + } 42 + 43 + func NewIndexer(indexDir string) *Indexer { 44 + return &Indexer{ 45 + path: indexDir, 46 + } 47 + } 48 + 49 + // Init initializes the indexer 50 + func (ix *Indexer) Init(ctx context.Context, e db.Execer) { 51 + l := tlog.FromContext(ctx) 52 + existed, err := ix.intialize(ctx) 53 + if err != nil { 54 + log.Fatalln("failed to initialize repo indexer", err) 55 + } 56 + if !existed { 57 + l.Debug("Populating the repo indexer") 58 + err := PopulateIndexer(ctx, ix, e) 59 + if err != nil { 60 + log.Fatalln("failed to populate repo indexer", err) 61 + } 62 + } 63 + 64 + count, _ := ix.indexer.DocCount() 65 + l.Info("Initialized the repo indexer", "docCount", count) 66 + } 67 + 68 + func generateRepoIndexMapping() (mapping.IndexMapping, error) { 69 + mapping := bleve.NewIndexMapping() 70 + docMapping := bleve.NewDocumentMapping() 71 + 72 + textFieldMapping := bleve.NewTextFieldMapping() 73 + textFieldMapping.Store = false 74 + textFieldMapping.IncludeInAll = false 75 + 76 + keywordFieldMapping := bleve.NewKeywordFieldMapping() 77 + keywordFieldMapping.Store = false 78 + keywordFieldMapping.IncludeInAll = false 79 + 80 + // case-insensitive keyword field for language and topics 81 + caseInsensitiveKeywordMapping := bleve.NewTextFieldMapping() 82 + caseInsensitiveKeywordMapping.Store = false 83 + caseInsensitiveKeywordMapping.IncludeInAll = false 84 + caseInsensitiveKeywordMapping.Analyzer = "keyword_lowercase" 85 + 86 + // trigram field for partial repo name matching 87 + trigramFieldMapping := bleve.NewTextFieldMapping() 88 + trigramFieldMapping.Store = false 89 + trigramFieldMapping.IncludeInAll = false 90 + trigramFieldMapping.Analyzer = "trigram" 91 + 92 + // text fields 93 + docMapping.AddFieldMappingsAt("name", textFieldMapping) 94 + docMapping.AddFieldMappingsAt("name_trigram", trigramFieldMapping) 95 + docMapping.AddFieldMappingsAt("description", textFieldMapping) 96 + docMapping.AddFieldMappingsAt("website", textFieldMapping) 97 + docMapping.AddFieldMappingsAt("topics", textFieldMapping) 98 + 99 + // keyword fields 100 + docMapping.AddFieldMappingsAt("language", caseInsensitiveKeywordMapping) 101 + docMapping.AddFieldMappingsAt("topics_exact", caseInsensitiveKeywordMapping) 102 + docMapping.AddFieldMappingsAt("did", keywordFieldMapping) 103 + docMapping.AddFieldMappingsAt("knot", keywordFieldMapping) 104 + docMapping.AddFieldMappingsAt("repo_at", keywordFieldMapping) 105 + 106 + err := mapping.AddCustomTokenFilter(unicodeNormalizeName, map[string]any{ 107 + "type": unicodenorm.Name, 108 + "form": unicodenorm.NFC, 109 + }) 110 + if err != nil { 111 + return nil, err 112 + } 113 + 114 + err = mapping.AddCustomTokenFilter("edgeNgram3", map[string]any{ 115 + "type": ngram.Name, 116 + "min": 2.0, 117 + "max": 3.0, 118 + }) 119 + if err != nil { 120 + return nil, err 121 + } 122 + 123 + err = mapping.AddCustomAnalyzer(repoIndexerAnalyzer, map[string]any{ 124 + "type": custom.Name, 125 + "char_filters": []string{}, 126 + "tokenizer": unicode.Name, 127 + "token_filters": []string{unicodeNormalizeName, camelcase.Name, lowercase.Name}, 128 + }) 129 + if err != nil { 130 + return nil, err 131 + } 132 + 133 + err = mapping.AddCustomAnalyzer("keyword_lowercase", map[string]any{ 134 + "type": custom.Name, 135 + "char_filters": []string{}, 136 + "tokenizer": "single", 137 + "token_filters": []string{lowercase.Name}, 138 + }) 139 + if err != nil { 140 + return nil, err 141 + } 142 + 143 + err = mapping.AddCustomAnalyzer("trigram", map[string]any{ 144 + "type": custom.Name, 145 + "char_filters": []string{}, 146 + "tokenizer": "single", 147 + "token_filters": []string{lowercase.Name, "edgeNgram3"}, 148 + }) 149 + if err != nil { 150 + return nil, err 151 + } 152 + 153 + mapping.DefaultAnalyzer = repoIndexerAnalyzer 154 + mapping.AddDocumentMapping(repoIndexerDocType, docMapping) 155 + mapping.AddDocumentMapping("_all", bleve.NewDocumentDisabledMapping()) 156 + mapping.DefaultMapping = bleve.NewDocumentDisabledMapping() 157 + 158 + return mapping, nil 159 + } 160 + 161 + func (ix *Indexer) intialize(ctx context.Context) (bool, error) { 162 + if ix.indexer != nil { 163 + return false, errors.New("indexer is already initialized") 164 + } 165 + 166 + indexer, err := openIndexer(ctx, ix.path, repoIndexerVersion) 167 + if err != nil { 168 + return false, err 169 + } 170 + if indexer != nil { 171 + ix.indexer = indexer 172 + return true, nil 173 + } 174 + 175 + mapping, err := generateRepoIndexMapping() 176 + if err != nil { 177 + return false, err 178 + } 179 + indexer, err = bleve.New(ix.path, mapping) 180 + if err != nil { 181 + return false, err 182 + } 183 + indexer.SetInternal([]byte("mapping_version"), []byte{byte(repoIndexerVersion)}) 184 + 185 + ix.indexer = indexer 186 + 187 + return false, nil 188 + } 189 + 190 + func openIndexer(ctx context.Context, path string, version int) (bleve.Index, error) { 191 + l := tlog.FromContext(ctx) 192 + indexer, err := bleve.Open(path) 193 + if err != nil { 194 + if errors.Is(err, upsidedown.IncompatibleVersion) { 195 + l.Info("Indexer was built with a previous version of bleve, deleting and rebuilding") 196 + return nil, os.RemoveAll(path) 197 + } 198 + return nil, nil 199 + } 200 + 201 + storedVersion, _ := indexer.GetInternal([]byte("mapping_version")) 202 + if storedVersion == nil || int(storedVersion[0]) != version { 203 + l.Info("Indexer mapping version changed, deleting and rebuilding") 204 + indexer.Close() 205 + return nil, os.RemoveAll(path) 206 + } 207 + 208 + return indexer, nil 209 + } 210 + 211 + func PopulateIndexer(ctx context.Context, ix *Indexer, e db.Execer) error { 212 + l := tlog.FromContext(ctx) 213 + count := 0 214 + 215 + err := pagination.IterateAll( 216 + func(page pagination.Page) ([]models.Repo, error) { 217 + return db.GetReposPaginated(e, page) 218 + }, 219 + func(repos []models.Repo) error { 220 + count += len(repos) 221 + return ix.Index(ctx, repos...) 222 + }, 223 + ) 224 + 225 + l.Info("repos indexed", "count", count) 226 + return err 227 + } 228 + 229 + type repoData struct { 230 + ID int64 `json:"id"` 231 + RepoAt string `json:"repo_at"` 232 + Did string `json:"did"` 233 + Name string `json:"name"` 234 + NameTrigram string `json:"name_trigram"` 235 + Description string `json:"description"` 236 + Website string `json:"website"` 237 + Topics []string `json:"topics"` 238 + TopicsExact []string `json:"topics_exact"` 239 + Knot string `json:"knot"` 240 + Language string `json:"language"` 241 + } 242 + 243 + func makeRepoData(repo *models.Repo) *repoData { 244 + return &repoData{ 245 + ID: repo.Id, 246 + RepoAt: repo.RepoAt().String(), 247 + Did: repo.Did, 248 + Name: repo.Name, 249 + NameTrigram: repo.Name, 250 + Description: repo.Description, 251 + Website: repo.Website, 252 + Topics: repo.Topics, 253 + TopicsExact: repo.Topics, 254 + Knot: repo.Knot, 255 + Language: repo.RepoStats.Language, 256 + } 257 + } 258 + 259 + // Type returns the document type, for bleve's mapping.Classifier interface. 260 + func (r *repoData) Type() string { 261 + return repoIndexerDocType 262 + } 263 + 264 + type SearchResult struct { 265 + Hits []int64 266 + Total uint64 267 + } 268 + 269 + const maxBatchSize = 20 270 + 271 + func (ix *Indexer) Index(ctx context.Context, repos ...models.Repo) error { 272 + batch := bleveutil.NewFlushingBatch(ix.indexer, maxBatchSize) 273 + for _, repo := range repos { 274 + repoData := makeRepoData(&repo) 275 + if err := batch.Index(base36.Encode(repo.Id), repoData); err != nil { 276 + return err 277 + } 278 + } 279 + return batch.Flush() 280 + } 281 + 282 + func (ix *Indexer) Delete(ctx context.Context, repoID int64) error { 283 + return ix.indexer.Delete(base36.Encode(repoID)) 284 + } 285 + 286 + func (ix *Indexer) Search(ctx context.Context, opts models.RepoSearchOptions) (*SearchResult, error) { 287 + var musts []query.Query 288 + var mustNots []query.Query 289 + 290 + for _, keyword := range opts.Keywords { 291 + musts = append(musts, bleve.NewDisjunctionQuery( 292 + bleveutil.MatchAndQuery("name", keyword, repoIndexerAnalyzer, 0), 293 + bleveutil.MatchAndQuery("name_trigram", keyword, "trigram", 0), 294 + bleveutil.MatchAndQuery("description", keyword, repoIndexerAnalyzer, 0), 295 + bleveutil.MatchAndQuery("website", keyword, repoIndexerAnalyzer, 0), 296 + bleveutil.MatchAndQuery("topics", keyword, repoIndexerAnalyzer, 0), 297 + )) 298 + } 299 + 300 + for _, phrase := range opts.Phrases { 301 + musts = append(musts, bleve.NewDisjunctionQuery( 302 + bleveutil.MatchPhraseQuery("name", phrase, repoIndexerAnalyzer), 303 + bleveutil.MatchPhraseQuery("description", phrase, repoIndexerAnalyzer), 304 + bleveutil.MatchPhraseQuery("website", phrase, repoIndexerAnalyzer), 305 + bleveutil.MatchPhraseQuery("topics", phrase, repoIndexerAnalyzer), 306 + )) 307 + } 308 + 309 + for _, keyword := range opts.NegatedKeywords { 310 + mustNots = append(mustNots, bleve.NewDisjunctionQuery( 311 + bleveutil.MatchAndQuery("name", keyword, repoIndexerAnalyzer, 0), 312 + bleveutil.MatchAndQuery("name_trigram", keyword, "trigram", 0), 313 + bleveutil.MatchAndQuery("description", keyword, repoIndexerAnalyzer, 0), 314 + bleveutil.MatchAndQuery("website", keyword, repoIndexerAnalyzer, 0), 315 + bleveutil.MatchAndQuery("topics", keyword, repoIndexerAnalyzer, 0), 316 + )) 317 + } 318 + 319 + for _, phrase := range opts.NegatedPhrases { 320 + mustNots = append(mustNots, bleve.NewDisjunctionQuery( 321 + bleveutil.MatchPhraseQuery("name", phrase, repoIndexerAnalyzer), 322 + bleveutil.MatchPhraseQuery("description", phrase, repoIndexerAnalyzer), 323 + bleveutil.MatchPhraseQuery("website", phrase, repoIndexerAnalyzer), 324 + bleveutil.MatchPhraseQuery("topics", phrase, repoIndexerAnalyzer), 325 + )) 326 + } 327 + 328 + // keyword filters 329 + if opts.Language != "" { 330 + musts = append(musts, bleveutil.MatchAndQuery("language", opts.Language, "keyword_lowercase", 0)) 331 + } 332 + 333 + if opts.Knot != "" { 334 + musts = append(musts, bleveutil.KeywordFieldQuery("knot", opts.Knot)) 335 + } 336 + 337 + if opts.Did != "" { 338 + musts = append(musts, bleveutil.KeywordFieldQuery("did", opts.Did)) 339 + } 340 + 341 + for _, topic := range opts.Topics { 342 + musts = append(musts, bleveutil.MatchAndQuery("topics_exact", topic, "keyword_lowercase", 0)) 343 + } 344 + 345 + for _, topic := range opts.NegatedTopics { 346 + mustNots = append(mustNots, bleveutil.MatchAndQuery("topics_exact", topic, "keyword_lowercase", 0)) 347 + } 348 + 349 + indexerQuery := bleve.NewBooleanQuery() 350 + if len(musts) == 0 { 351 + musts = append(musts, bleve.NewMatchAllQuery()) 352 + } 353 + indexerQuery.AddMust(musts...) 354 + indexerQuery.AddMustNot(mustNots...) 355 + searchReq := bleve.NewSearchRequestOptions(indexerQuery, opts.Page.Limit, opts.Page.Offset, false) 356 + res, err := ix.indexer.SearchInContext(ctx, searchReq) 357 + if err != nil { 358 + return nil, nil 359 + } 360 + ret := &SearchResult{ 361 + Total: res.Total, 362 + Hits: make([]int64, len(res.Hits)), 363 + } 364 + for i, hit := range res.Hits { 365 + id, err := base36.Decode(hit.ID) 366 + if err != nil { 367 + return nil, err 368 + } 369 + ret.Hits[i] = id 370 + } 371 + return ret, nil 372 + }
+639
appview/indexer/repos/indexer_test.go
··· 1 + package repos_indexer 2 + 3 + import ( 4 + "context" 5 + "os" 6 + "testing" 7 + 8 + "github.com/blevesearch/bleve/v2" 9 + "github.com/stretchr/testify/assert" 10 + "github.com/stretchr/testify/require" 11 + "tangled.org/core/appview/models" 12 + "tangled.org/core/appview/pagination" 13 + ) 14 + 15 + func setupTestIndexer(t *testing.T) (*Indexer, func()) { 16 + t.Helper() 17 + 18 + tmpDir, err := os.MkdirTemp("", "repo_indexer_test") 19 + require.NoError(t, err) 20 + 21 + ix := NewIndexer(tmpDir) 22 + 23 + mapping, err := generateRepoIndexMapping() 24 + require.NoError(t, err) 25 + 26 + indexer, err := bleve.New(tmpDir, mapping) 27 + require.NoError(t, err) 28 + ix.indexer = indexer 29 + 30 + cleanup := func() { 31 + ix.indexer.Close() 32 + os.RemoveAll(tmpDir) 33 + } 34 + 35 + return ix, cleanup 36 + } 37 + 38 + func TestBasicIndexingAndSearch(t *testing.T) { 39 + ix, cleanup := setupTestIndexer(t) 40 + defer cleanup() 41 + 42 + ctx := context.Background() 43 + 44 + err := ix.Index(ctx, 45 + models.Repo{ 46 + Id: 1, 47 + Did: "did:plc:alice", 48 + Name: "web-framework", 49 + Knot: "example.com", 50 + Description: "A modern web framework for Go", 51 + Website: "https://example.com/web-framework", 52 + Topics: []string{"web", "framework", "golang"}, 53 + RepoStats: &models.RepoStats{Language: "Go"}, 54 + }, 55 + models.Repo{ 56 + Id: 2, 57 + Did: "did:plc:bob", 58 + Name: "cli-tool", 59 + Knot: "example.com", 60 + Description: "Command line utility for developers", 61 + Website: "", 62 + Topics: []string{"cli", "tool"}, 63 + RepoStats: &models.RepoStats{Language: "Rust"}, 64 + }, 65 + models.Repo{ 66 + Id: 3, 67 + Did: "did:plc:alice", 68 + Name: "javascript-parser", 69 + Knot: "example.com", 70 + Description: "Fast JavaScript parser", 71 + Website: "", 72 + Topics: []string{"javascript", "parser"}, 73 + RepoStats: &models.RepoStats{Language: "JavaScript"}, 74 + }, 75 + ) 76 + require.NoError(t, err) 77 + 78 + // search by name 79 + result, err := ix.Search(ctx, models.RepoSearchOptions{ 80 + Keywords: []string{"framework"}, 81 + Page: pagination.Page{Limit: 10}, 82 + }) 83 + require.NoError(t, err) 84 + assert.Equal(t, uint64(1), result.Total) 85 + assert.Contains(t, result.Hits, int64(1)) 86 + 87 + // search by description 88 + result, err = ix.Search(ctx, models.RepoSearchOptions{ 89 + Keywords: []string{"utility"}, 90 + Page: pagination.Page{Limit: 10}, 91 + }) 92 + require.NoError(t, err) 93 + assert.Equal(t, uint64(1), result.Total) 94 + assert.Contains(t, result.Hits, int64(2)) 95 + 96 + // search by website 97 + result, err = ix.Search(ctx, models.RepoSearchOptions{ 98 + Keywords: []string{"example.com/web-framework"}, 99 + Page: pagination.Page{Limit: 10}, 100 + }) 101 + require.NoError(t, err) 102 + assert.Equal(t, uint64(1), result.Total) 103 + assert.Contains(t, result.Hits, int64(1)) 104 + } 105 + 106 + func TestLanguageFiltering(t *testing.T) { 107 + ix, cleanup := setupTestIndexer(t) 108 + defer cleanup() 109 + 110 + ctx := context.Background() 111 + 112 + err := ix.Index(ctx, 113 + models.Repo{ 114 + Id: 1, 115 + Did: "did:plc:alice", 116 + Name: "go-project", 117 + RepoStats: &models.RepoStats{Language: "Go"}, 118 + }, 119 + models.Repo{ 120 + Id: 2, 121 + Did: "did:plc:bob", 122 + Name: "rust-project", 123 + RepoStats: &models.RepoStats{Language: "Rust"}, 124 + }, 125 + models.Repo{ 126 + Id: 3, 127 + Did: "did:plc:alice", 128 + Name: "another-go-project", 129 + RepoStats: &models.RepoStats{Language: "Go"}, 130 + }, 131 + ) 132 + require.NoError(t, err) 133 + 134 + // filter by go language 135 + result, err := ix.Search(ctx, models.RepoSearchOptions{ 136 + Language: "Go", 137 + Page: pagination.Page{Limit: 10}, 138 + }) 139 + require.NoError(t, err) 140 + assert.Equal(t, uint64(2), result.Total) 141 + assert.Contains(t, result.Hits, int64(1)) 142 + assert.Contains(t, result.Hits, int64(3)) 143 + 144 + // filter by rust language 145 + result, err = ix.Search(ctx, models.RepoSearchOptions{ 146 + Language: "Rust", 147 + Page: pagination.Page{Limit: 10}, 148 + }) 149 + require.NoError(t, err) 150 + assert.Equal(t, uint64(1), result.Total) 151 + assert.Contains(t, result.Hits, int64(2)) 152 + } 153 + 154 + func TestTopicExactMatching(t *testing.T) { 155 + ix, cleanup := setupTestIndexer(t) 156 + defer cleanup() 157 + 158 + ctx := context.Background() 159 + 160 + err := ix.Index(ctx, 161 + models.Repo{ 162 + Id: 1, 163 + Did: "did:plc:alice", 164 + Name: "js-tool", 165 + Topics: []string{"javascript", "tool"}, 166 + RepoStats: &models.RepoStats{}, 167 + }, 168 + models.Repo{ 169 + Id: 2, 170 + Did: "did:plc:bob", 171 + Name: "java-app", 172 + Topics: []string{"java", "application"}, 173 + RepoStats: &models.RepoStats{}, 174 + }, 175 + models.Repo{ 176 + Id: 3, 177 + Did: "did:plc:alice", 178 + Name: "cli-tool", 179 + Topics: []string{"cli", "tool"}, 180 + RepoStats: &models.RepoStats{}, 181 + }, 182 + ) 183 + require.NoError(t, err) 184 + 185 + // exact match for "javascript" topic 186 + result, err := ix.Search(ctx, models.RepoSearchOptions{ 187 + Topics: []string{"javascript"}, 188 + Page: pagination.Page{Limit: 10}, 189 + }) 190 + require.NoError(t, err) 191 + assert.Equal(t, uint64(1), result.Total) 192 + assert.Contains(t, result.Hits, int64(1)) 193 + 194 + // exact match for "tool" topic (should match repos 1 and 3) 195 + result, err = ix.Search(ctx, models.RepoSearchOptions{ 196 + Topics: []string{"tool"}, 197 + Page: pagination.Page{Limit: 10}, 198 + }) 199 + require.NoError(t, err) 200 + assert.Equal(t, uint64(2), result.Total) 201 + assert.Contains(t, result.Hits, int64(1)) 202 + assert.Contains(t, result.Hits, int64(3)) 203 + } 204 + 205 + func TestTopicTextSearch(t *testing.T) { 206 + ix, cleanup := setupTestIndexer(t) 207 + defer cleanup() 208 + 209 + ctx := context.Background() 210 + 211 + err := ix.Index(ctx, 212 + models.Repo{ 213 + Id: 1, 214 + Did: "did:plc:alice", 215 + Name: "js-tool", 216 + Topics: []string{"JavaScript"}, 217 + RepoStats: &models.RepoStats{}, 218 + }, 219 + models.Repo{ 220 + Id: 2, 221 + Did: "did:plc:bob", 222 + Name: "java-app", 223 + Topics: []string{"Java"}, 224 + RepoStats: &models.RepoStats{}, 225 + }, 226 + ) 227 + require.NoError(t, err) 228 + 229 + result, err := ix.Search(ctx, models.RepoSearchOptions{ 230 + Keywords: []string{"Java"}, 231 + Page: pagination.Page{Limit: 10}, 232 + }) 233 + require.NoError(t, err) 234 + assert.Equal(t, uint64(2), result.Total) 235 + assert.Contains(t, result.Hits, int64(1)) 236 + assert.Contains(t, result.Hits, int64(2)) 237 + } 238 + 239 + func TestNegatedFilters(t *testing.T) { 240 + ix, cleanup := setupTestIndexer(t) 241 + defer cleanup() 242 + 243 + ctx := context.Background() 244 + 245 + err := ix.Index(ctx, 246 + models.Repo{ 247 + Id: 1, 248 + Did: "did:plc:alice", 249 + Name: "active-project", 250 + Description: "An active development project", 251 + Topics: []string{"active"}, 252 + RepoStats: &models.RepoStats{Language: "Go"}, 253 + }, 254 + models.Repo{ 255 + Id: 2, 256 + Did: "did:plc:bob", 257 + Name: "archived-project", 258 + Description: "An archived project", 259 + Topics: []string{"archived"}, 260 + RepoStats: &models.RepoStats{Language: "Python"}, 261 + }, 262 + models.Repo{ 263 + Id: 3, 264 + Did: "did:plc:alice", 265 + Name: "another-project", 266 + Description: "Another active project", 267 + Topics: []string{"active"}, 268 + RepoStats: &models.RepoStats{Language: "Go"}, 269 + }, 270 + ) 271 + require.NoError(t, err) 272 + 273 + // exclude archived topic 274 + result, err := ix.Search(ctx, models.RepoSearchOptions{ 275 + NegatedTopics: []string{"archived"}, 276 + Page: pagination.Page{Limit: 10}, 277 + }) 278 + require.NoError(t, err) 279 + assert.Equal(t, uint64(2), result.Total) 280 + assert.Contains(t, result.Hits, int64(1)) 281 + assert.Contains(t, result.Hits, int64(3)) 282 + 283 + // exclude keyword "archived" 284 + result, err = ix.Search(ctx, models.RepoSearchOptions{ 285 + NegatedKeywords: []string{"archived"}, 286 + Page: pagination.Page{Limit: 10}, 287 + }) 288 + require.NoError(t, err) 289 + assert.Equal(t, uint64(2), result.Total) 290 + assert.Contains(t, result.Hits, int64(1)) 291 + assert.Contains(t, result.Hits, int64(3)) 292 + 293 + // exclude phrase 294 + result, err = ix.Search(ctx, models.RepoSearchOptions{ 295 + NegatedPhrases: []string{"archived project"}, 296 + Page: pagination.Page{Limit: 10}, 297 + }) 298 + require.NoError(t, err) 299 + assert.Equal(t, uint64(2), result.Total) 300 + assert.Contains(t, result.Hits, int64(1)) 301 + assert.Contains(t, result.Hits, int64(3)) 302 + } 303 + 304 + func TestPagination(t *testing.T) { 305 + ix, cleanup := setupTestIndexer(t) 306 + defer cleanup() 307 + 308 + ctx := context.Background() 309 + 310 + // index multiple repos 311 + var repos []models.Repo 312 + for i := 1; i <= 25; i++ { 313 + repos = append(repos, models.Repo{ 314 + Id: int64(i), 315 + Did: "did:plc:alice", 316 + Name: "project", 317 + Topics: []string{"test"}, 318 + RepoStats: &models.RepoStats{}, 319 + }) 320 + } 321 + err := ix.Index(ctx, repos...) 322 + require.NoError(t, err) 323 + 324 + // first page 325 + result, err := ix.Search(ctx, models.RepoSearchOptions{ 326 + Topics: []string{"test"}, 327 + Page: pagination.Page{Limit: 10, Offset: 0}, 328 + }) 329 + require.NoError(t, err) 330 + assert.Equal(t, uint64(25), result.Total) 331 + assert.Len(t, result.Hits, 10) 332 + 333 + // second page 334 + result, err = ix.Search(ctx, models.RepoSearchOptions{ 335 + Topics: []string{"test"}, 336 + Page: pagination.Page{Limit: 10, Offset: 10}, 337 + }) 338 + require.NoError(t, err) 339 + assert.Equal(t, uint64(25), result.Total) 340 + assert.Len(t, result.Hits, 10) 341 + 342 + // third page - 5 items 343 + result, err = ix.Search(ctx, models.RepoSearchOptions{ 344 + Topics: []string{"test"}, 345 + Page: pagination.Page{Limit: 10, Offset: 20}, 346 + }) 347 + require.NoError(t, err) 348 + assert.Equal(t, uint64(25), result.Total) 349 + assert.Len(t, result.Hits, 5) 350 + } 351 + 352 + func TestUpdateReindex(t *testing.T) { 353 + ix, cleanup := setupTestIndexer(t) 354 + defer cleanup() 355 + 356 + ctx := context.Background() 357 + 358 + // initial index 359 + err := ix.Index(ctx, models.Repo{ 360 + Id: 1, 361 + Did: "did:plc:alice", 362 + Name: "my-project", 363 + Description: "Initial description", 364 + Topics: []string{"initial"}, 365 + RepoStats: &models.RepoStats{Language: "Go"}, 366 + }) 367 + require.NoError(t, err) 368 + 369 + // search for initial state 370 + result, err := ix.Search(ctx, models.RepoSearchOptions{ 371 + Keywords: []string{"Initial"}, 372 + Page: pagination.Page{Limit: 10}, 373 + }) 374 + require.NoError(t, err) 375 + assert.Equal(t, uint64(1), result.Total) 376 + 377 + // update the repo 378 + err = ix.Index(ctx, models.Repo{ 379 + Id: 1, 380 + Did: "did:plc:alice", 381 + Name: "my-project", 382 + Description: "Updated description", 383 + Topics: []string{"updated"}, 384 + RepoStats: &models.RepoStats{Language: "Rust"}, 385 + }) 386 + require.NoError(t, err) 387 + 388 + // search for old description should return nothing 389 + result, err = ix.Search(ctx, models.RepoSearchOptions{ 390 + Keywords: []string{"Initial"}, 391 + Page: pagination.Page{Limit: 10}, 392 + }) 393 + require.NoError(t, err) 394 + assert.Equal(t, uint64(0), result.Total) 395 + 396 + // search for new description should work 397 + result, err = ix.Search(ctx, models.RepoSearchOptions{ 398 + Keywords: []string{"Updated"}, 399 + Page: pagination.Page{Limit: 10}, 400 + }) 401 + require.NoError(t, err) 402 + assert.Equal(t, uint64(1), result.Total) 403 + 404 + // language should be updated 405 + result, err = ix.Search(ctx, models.RepoSearchOptions{ 406 + Language: "Rust", 407 + Page: pagination.Page{Limit: 10}, 408 + }) 409 + require.NoError(t, err) 410 + assert.Equal(t, uint64(1), result.Total) 411 + } 412 + 413 + func TestEmptyResults(t *testing.T) { 414 + ix, cleanup := setupTestIndexer(t) 415 + defer cleanup() 416 + 417 + ctx := context.Background() 418 + 419 + err := ix.Index(ctx, models.Repo{ 420 + Id: 1, 421 + Did: "did:plc:alice", 422 + Name: "my-project", 423 + RepoStats: &models.RepoStats{}, 424 + }) 425 + require.NoError(t, err) 426 + 427 + // search for non-existent keyword 428 + result, err := ix.Search(ctx, models.RepoSearchOptions{ 429 + Keywords: []string{"nonexistent"}, 430 + Page: pagination.Page{Limit: 10}, 431 + }) 432 + require.NoError(t, err) 433 + assert.Equal(t, uint64(0), result.Total) 434 + assert.Empty(t, result.Hits) 435 + 436 + // search for non-existent language 437 + result, err = ix.Search(ctx, models.RepoSearchOptions{ 438 + Language: "NonexistentLanguage", 439 + Page: pagination.Page{Limit: 10}, 440 + }) 441 + require.NoError(t, err) 442 + assert.Equal(t, uint64(0), result.Total) 443 + assert.Empty(t, result.Hits) 444 + } 445 + 446 + func TestCombinedFilters(t *testing.T) { 447 + ix, cleanup := setupTestIndexer(t) 448 + defer cleanup() 449 + 450 + ctx := context.Background() 451 + 452 + err := ix.Index(ctx, 453 + models.Repo{ 454 + Id: 1, 455 + Did: "did:plc:alice", 456 + Name: "web-server", 457 + Knot: "example.com", 458 + Description: "A web server in Go", 459 + Topics: []string{"web", "server"}, 460 + RepoStats: &models.RepoStats{Language: "Go"}, 461 + }, 462 + models.Repo{ 463 + Id: 2, 464 + Did: "did:plc:bob", 465 + Name: "web-client", 466 + Knot: "example.org", 467 + Description: "A web client in Rust", 468 + Topics: []string{"web", "client"}, 469 + RepoStats: &models.RepoStats{Language: "Rust"}, 470 + }, 471 + models.Repo{ 472 + Id: 3, 473 + Did: "did:plc:alice", 474 + Name: "cli-tool", 475 + Knot: "example.com", 476 + Description: "A CLI tool in Go", 477 + Topics: []string{"cli", "tool"}, 478 + RepoStats: &models.RepoStats{Language: "Go"}, 479 + }, 480 + ) 481 + require.NoError(t, err) 482 + 483 + // combine language + topic + keyword 484 + result, err := ix.Search(ctx, models.RepoSearchOptions{ 485 + Language: "Go", 486 + Topics: []string{"web"}, 487 + Keywords: []string{"server"}, 488 + Page: pagination.Page{Limit: 10}, 489 + }) 490 + require.NoError(t, err) 491 + assert.Equal(t, uint64(1), result.Total) 492 + assert.Contains(t, result.Hits, int64(1)) 493 + 494 + // combine did + language 495 + result, err = ix.Search(ctx, models.RepoSearchOptions{ 496 + Did: "did:plc:alice", 497 + Language: "Go", 498 + Page: pagination.Page{Limit: 10}, 499 + }) 500 + require.NoError(t, err) 501 + assert.Equal(t, uint64(2), result.Total) 502 + assert.Contains(t, result.Hits, int64(1)) 503 + assert.Contains(t, result.Hits, int64(3)) 504 + 505 + // combine knot + language 506 + result, err = ix.Search(ctx, models.RepoSearchOptions{ 507 + Knot: "example.com", 508 + Language: "Go", 509 + Page: pagination.Page{Limit: 10}, 510 + }) 511 + require.NoError(t, err) 512 + assert.Equal(t, uint64(2), result.Total) 513 + assert.Contains(t, result.Hits, int64(1)) 514 + assert.Contains(t, result.Hits, int64(3)) 515 + } 516 + 517 + func TestRepoWithoutLanguage(t *testing.T) { 518 + ix, cleanup := setupTestIndexer(t) 519 + defer cleanup() 520 + 521 + ctx := context.Background() 522 + 523 + err := ix.Index(ctx, 524 + models.Repo{ 525 + Id: 1, 526 + Did: "did:plc:alice", 527 + Name: "project-with-language", 528 + RepoStats: &models.RepoStats{Language: "Go"}, 529 + }, 530 + models.Repo{ 531 + Id: 2, 532 + Did: "did:plc:bob", 533 + Name: "project-without-language", 534 + RepoStats: &models.RepoStats{Language: ""}, 535 + }, 536 + ) 537 + require.NoError(t, err) 538 + 539 + // search without language filter should return both 540 + result, err := ix.Search(ctx, models.RepoSearchOptions{ 541 + Keywords: []string{"project"}, 542 + Page: pagination.Page{Limit: 10}, 543 + }) 544 + require.NoError(t, err) 545 + assert.Equal(t, uint64(2), result.Total) 546 + 547 + // language filter should only return repo with language 548 + result, err = ix.Search(ctx, models.RepoSearchOptions{ 549 + Language: "Go", 550 + Page: pagination.Page{Limit: 10}, 551 + }) 552 + require.NoError(t, err) 553 + assert.Equal(t, uint64(1), result.Total) 554 + assert.Contains(t, result.Hits, int64(1)) 555 + } 556 + 557 + func TestRepoWithoutTopics(t *testing.T) { 558 + ix, cleanup := setupTestIndexer(t) 559 + defer cleanup() 560 + 561 + ctx := context.Background() 562 + 563 + err := ix.Index(ctx, 564 + models.Repo{ 565 + Id: 1, 566 + Did: "did:plc:alice", 567 + Name: "project-with-topics", 568 + Topics: []string{"cli", "tool"}, 569 + RepoStats: &models.RepoStats{}, 570 + }, 571 + models.Repo{ 572 + Id: 2, 573 + Did: "did:plc:bob", 574 + Name: "project-without-topics", 575 + Topics: []string{}, 576 + RepoStats: &models.RepoStats{}, 577 + }, 578 + ) 579 + require.NoError(t, err) 580 + 581 + // topic filter should only return repo with topics 582 + result, err := ix.Search(ctx, models.RepoSearchOptions{ 583 + Topics: []string{"cli"}, 584 + Page: pagination.Page{Limit: 10}, 585 + }) 586 + require.NoError(t, err) 587 + assert.Equal(t, uint64(1), result.Total) 588 + assert.Contains(t, result.Hits, int64(1)) 589 + 590 + // general search should return both 591 + result, err = ix.Search(ctx, models.RepoSearchOptions{ 592 + Keywords: []string{"project"}, 593 + Page: pagination.Page{Limit: 10}, 594 + }) 595 + require.NoError(t, err) 596 + assert.Equal(t, uint64(2), result.Total) 597 + } 598 + 599 + func TestDelete(t *testing.T) { 600 + ix, cleanup := setupTestIndexer(t) 601 + defer cleanup() 602 + 603 + ctx := context.Background() 604 + 605 + err := ix.Index(ctx, 606 + models.Repo{ 607 + Id: 1, 608 + Did: "did:plc:alice", 609 + Name: "to-delete", 610 + RepoStats: &models.RepoStats{}, 611 + }, 612 + models.Repo{ 613 + Id: 2, 614 + Did: "did:plc:bob", 615 + Name: "to-keep", 616 + RepoStats: &models.RepoStats{}, 617 + }, 618 + ) 619 + require.NoError(t, err) 620 + 621 + // verify both exist 622 + result, err := ix.Search(ctx, models.RepoSearchOptions{ 623 + Page: pagination.Page{Limit: 10}, 624 + }) 625 + require.NoError(t, err) 626 + assert.Equal(t, uint64(2), result.Total) 627 + 628 + // delete repo 1 629 + err = ix.Delete(ctx, 1) 630 + require.NoError(t, err) 631 + 632 + // verify only one remains 633 + result, err = ix.Search(ctx, models.RepoSearchOptions{ 634 + Page: pagination.Page{Limit: 10}, 635 + }) 636 + require.NoError(t, err) 637 + assert.Equal(t, uint64(1), result.Total) 638 + assert.Contains(t, result.Hits, int64(2)) 639 + }
+23
appview/models/search.go
··· 53 53 len(o.LabelValues) > 0 || len(o.NegatedLabelValues) > 0 || 54 54 len(o.NegatedKeywords) > 0 || len(o.NegatedPhrases) > 0 55 55 } 56 + 57 + type RepoSearchOptions struct { 58 + Keywords []string // text search across name, description, website, topics 59 + Phrases []string // phrase search 60 + 61 + Language string // exact match on primary language 62 + Knot string // filter by knot domain 63 + Did string // filter by owner DID 64 + Topics []string // exact topic matches 65 + 66 + NegatedKeywords []string 67 + NegatedPhrases []string 68 + NegatedTopics []string 69 + 70 + Page pagination.Page 71 + } 72 + 73 + func (o *RepoSearchOptions) HasSearchFilters() bool { 74 + return len(o.Keywords) > 0 || len(o.Phrases) > 0 || 75 + o.Language != "" || o.Did != "" || 76 + len(o.Topics) > 0 || len(o.NegatedTopics) > 0 || 77 + len(o.NegatedKeywords) > 0 || len(o.NegatedPhrases) > 0 78 + }

History

1 round 0 comments
sign up or login to add to the discussion
oppi.li submitted #0
1 commit
expand
appview/indexer: add indexer for repos
2/3 failed, 1/3 success
expand
expand 0 comments
pull request successfully merged