Signed-off-by: oppiliappan me@oppi.li
+1047
Diff
round #0
+4
appview/indexer/indexer.go
+4
appview/indexer/indexer.go
···
7
7
"tangled.org/core/appview/db"
8
8
issues_indexer "tangled.org/core/appview/indexer/issues"
9
9
pulls_indexer "tangled.org/core/appview/indexer/pulls"
10
+
repos_indexer "tangled.org/core/appview/indexer/repos"
10
11
"tangled.org/core/appview/notify"
11
12
tlog "tangled.org/core/log"
12
13
)
···
14
15
type Indexer struct {
15
16
Issues *issues_indexer.Indexer
16
17
Pulls *pulls_indexer.Indexer
18
+
Repos *repos_indexer.Indexer
17
19
logger *slog.Logger
18
20
notify.BaseNotifier
19
21
}
···
22
24
return &Indexer{
23
25
issues_indexer.NewIndexer("indexes/issues.bleve"),
24
26
pulls_indexer.NewIndexer("indexes/pulls.bleve"),
27
+
repos_indexer.NewIndexer("indexes/repos.bleve"),
25
28
logger,
26
29
notify.BaseNotifier{},
27
30
}
···
32
35
ctx = tlog.IntoContext(ctx, ix.logger)
33
36
ix.Issues.Init(ctx, db)
34
37
ix.Pulls.Init(ctx, db)
38
+
ix.Repos.Init(ctx, db)
35
39
return nil
36
40
}
+9
appview/indexer/notifier.go
+9
appview/indexer/notifier.go
···
73
73
l.Error("failed to index a pr", "err", err)
74
74
}
75
75
}
76
+
77
+
func (ix *Indexer) NewRepo(ctx context.Context, repo *models.Repo) {
78
+
l := log.FromContext(ctx).With("notifier", "indexer", "repo", repo)
79
+
l.Debug("indexing new repo")
80
+
err := ix.Repos.Index(ctx, *repo)
81
+
if err != nil {
82
+
l.Error("failed to index a repo", "err", err)
83
+
}
84
+
}
+372
appview/indexer/repos/indexer.go
+372
appview/indexer/repos/indexer.go
···
1
+
// heavily inspired by gitea's model (basically copy-pasted)
2
+
package repos_indexer
3
+
4
+
import (
5
+
"context"
6
+
"errors"
7
+
"log"
8
+
"os"
9
+
10
+
"github.com/blevesearch/bleve/v2"
11
+
"github.com/blevesearch/bleve/v2/analysis/analyzer/custom"
12
+
"github.com/blevesearch/bleve/v2/analysis/token/camelcase"
13
+
"github.com/blevesearch/bleve/v2/analysis/token/lowercase"
14
+
"github.com/blevesearch/bleve/v2/analysis/token/ngram"
15
+
"github.com/blevesearch/bleve/v2/analysis/token/unicodenorm"
16
+
"github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode"
17
+
"github.com/blevesearch/bleve/v2/index/upsidedown"
18
+
"github.com/blevesearch/bleve/v2/mapping"
19
+
"github.com/blevesearch/bleve/v2/search/query"
20
+
"tangled.org/core/appview/db"
21
+
"tangled.org/core/appview/indexer/base36"
22
+
bleveutil "tangled.org/core/appview/indexer/bleve"
23
+
"tangled.org/core/appview/models"
24
+
"tangled.org/core/appview/pagination"
25
+
tlog "tangled.org/core/log"
26
+
)
27
+
28
+
const (
29
+
repoIndexerAnalyzer = "repoIndexer"
30
+
repoIndexerDocType = "repoIndexerDocType"
31
+
32
+
unicodeNormalizeName = "unicodeNormalize"
33
+
34
+
// Bump this when the index mapping changes to trigger a rebuild.
35
+
repoIndexerVersion = 5
36
+
)
37
+
38
+
type Indexer struct {
39
+
indexer bleve.Index
40
+
path string
41
+
}
42
+
43
+
func NewIndexer(indexDir string) *Indexer {
44
+
return &Indexer{
45
+
path: indexDir,
46
+
}
47
+
}
48
+
49
+
// Init initializes the indexer
50
+
func (ix *Indexer) Init(ctx context.Context, e db.Execer) {
51
+
l := tlog.FromContext(ctx)
52
+
existed, err := ix.intialize(ctx)
53
+
if err != nil {
54
+
log.Fatalln("failed to initialize repo indexer", err)
55
+
}
56
+
if !existed {
57
+
l.Debug("Populating the repo indexer")
58
+
err := PopulateIndexer(ctx, ix, e)
59
+
if err != nil {
60
+
log.Fatalln("failed to populate repo indexer", err)
61
+
}
62
+
}
63
+
64
+
count, _ := ix.indexer.DocCount()
65
+
l.Info("Initialized the repo indexer", "docCount", count)
66
+
}
67
+
68
+
func generateRepoIndexMapping() (mapping.IndexMapping, error) {
69
+
mapping := bleve.NewIndexMapping()
70
+
docMapping := bleve.NewDocumentMapping()
71
+
72
+
textFieldMapping := bleve.NewTextFieldMapping()
73
+
textFieldMapping.Store = false
74
+
textFieldMapping.IncludeInAll = false
75
+
76
+
keywordFieldMapping := bleve.NewKeywordFieldMapping()
77
+
keywordFieldMapping.Store = false
78
+
keywordFieldMapping.IncludeInAll = false
79
+
80
+
// case-insensitive keyword field for language and topics
81
+
caseInsensitiveKeywordMapping := bleve.NewTextFieldMapping()
82
+
caseInsensitiveKeywordMapping.Store = false
83
+
caseInsensitiveKeywordMapping.IncludeInAll = false
84
+
caseInsensitiveKeywordMapping.Analyzer = "keyword_lowercase"
85
+
86
+
// trigram field for partial repo name matching
87
+
trigramFieldMapping := bleve.NewTextFieldMapping()
88
+
trigramFieldMapping.Store = false
89
+
trigramFieldMapping.IncludeInAll = false
90
+
trigramFieldMapping.Analyzer = "trigram"
91
+
92
+
// text fields
93
+
docMapping.AddFieldMappingsAt("name", textFieldMapping)
94
+
docMapping.AddFieldMappingsAt("name_trigram", trigramFieldMapping)
95
+
docMapping.AddFieldMappingsAt("description", textFieldMapping)
96
+
docMapping.AddFieldMappingsAt("website", textFieldMapping)
97
+
docMapping.AddFieldMappingsAt("topics", textFieldMapping)
98
+
99
+
// keyword fields
100
+
docMapping.AddFieldMappingsAt("language", caseInsensitiveKeywordMapping)
101
+
docMapping.AddFieldMappingsAt("topics_exact", caseInsensitiveKeywordMapping)
102
+
docMapping.AddFieldMappingsAt("did", keywordFieldMapping)
103
+
docMapping.AddFieldMappingsAt("knot", keywordFieldMapping)
104
+
docMapping.AddFieldMappingsAt("repo_at", keywordFieldMapping)
105
+
106
+
err := mapping.AddCustomTokenFilter(unicodeNormalizeName, map[string]any{
107
+
"type": unicodenorm.Name,
108
+
"form": unicodenorm.NFC,
109
+
})
110
+
if err != nil {
111
+
return nil, err
112
+
}
113
+
114
+
err = mapping.AddCustomTokenFilter("edgeNgram3", map[string]any{
115
+
"type": ngram.Name,
116
+
"min": 2.0,
117
+
"max": 3.0,
118
+
})
119
+
if err != nil {
120
+
return nil, err
121
+
}
122
+
123
+
err = mapping.AddCustomAnalyzer(repoIndexerAnalyzer, map[string]any{
124
+
"type": custom.Name,
125
+
"char_filters": []string{},
126
+
"tokenizer": unicode.Name,
127
+
"token_filters": []string{unicodeNormalizeName, camelcase.Name, lowercase.Name},
128
+
})
129
+
if err != nil {
130
+
return nil, err
131
+
}
132
+
133
+
err = mapping.AddCustomAnalyzer("keyword_lowercase", map[string]any{
134
+
"type": custom.Name,
135
+
"char_filters": []string{},
136
+
"tokenizer": "single",
137
+
"token_filters": []string{lowercase.Name},
138
+
})
139
+
if err != nil {
140
+
return nil, err
141
+
}
142
+
143
+
err = mapping.AddCustomAnalyzer("trigram", map[string]any{
144
+
"type": custom.Name,
145
+
"char_filters": []string{},
146
+
"tokenizer": "single",
147
+
"token_filters": []string{lowercase.Name, "edgeNgram3"},
148
+
})
149
+
if err != nil {
150
+
return nil, err
151
+
}
152
+
153
+
mapping.DefaultAnalyzer = repoIndexerAnalyzer
154
+
mapping.AddDocumentMapping(repoIndexerDocType, docMapping)
155
+
mapping.AddDocumentMapping("_all", bleve.NewDocumentDisabledMapping())
156
+
mapping.DefaultMapping = bleve.NewDocumentDisabledMapping()
157
+
158
+
return mapping, nil
159
+
}
160
+
161
+
func (ix *Indexer) intialize(ctx context.Context) (bool, error) {
162
+
if ix.indexer != nil {
163
+
return false, errors.New("indexer is already initialized")
164
+
}
165
+
166
+
indexer, err := openIndexer(ctx, ix.path, repoIndexerVersion)
167
+
if err != nil {
168
+
return false, err
169
+
}
170
+
if indexer != nil {
171
+
ix.indexer = indexer
172
+
return true, nil
173
+
}
174
+
175
+
mapping, err := generateRepoIndexMapping()
176
+
if err != nil {
177
+
return false, err
178
+
}
179
+
indexer, err = bleve.New(ix.path, mapping)
180
+
if err != nil {
181
+
return false, err
182
+
}
183
+
indexer.SetInternal([]byte("mapping_version"), []byte{byte(repoIndexerVersion)})
184
+
185
+
ix.indexer = indexer
186
+
187
+
return false, nil
188
+
}
189
+
190
+
func openIndexer(ctx context.Context, path string, version int) (bleve.Index, error) {
191
+
l := tlog.FromContext(ctx)
192
+
indexer, err := bleve.Open(path)
193
+
if err != nil {
194
+
if errors.Is(err, upsidedown.IncompatibleVersion) {
195
+
l.Info("Indexer was built with a previous version of bleve, deleting and rebuilding")
196
+
return nil, os.RemoveAll(path)
197
+
}
198
+
return nil, nil
199
+
}
200
+
201
+
storedVersion, _ := indexer.GetInternal([]byte("mapping_version"))
202
+
if storedVersion == nil || int(storedVersion[0]) != version {
203
+
l.Info("Indexer mapping version changed, deleting and rebuilding")
204
+
indexer.Close()
205
+
return nil, os.RemoveAll(path)
206
+
}
207
+
208
+
return indexer, nil
209
+
}
210
+
211
+
func PopulateIndexer(ctx context.Context, ix *Indexer, e db.Execer) error {
212
+
l := tlog.FromContext(ctx)
213
+
count := 0
214
+
215
+
err := pagination.IterateAll(
216
+
func(page pagination.Page) ([]models.Repo, error) {
217
+
return db.GetReposPaginated(e, page)
218
+
},
219
+
func(repos []models.Repo) error {
220
+
count += len(repos)
221
+
return ix.Index(ctx, repos...)
222
+
},
223
+
)
224
+
225
+
l.Info("repos indexed", "count", count)
226
+
return err
227
+
}
228
+
229
+
type repoData struct {
230
+
ID int64 `json:"id"`
231
+
RepoAt string `json:"repo_at"`
232
+
Did string `json:"did"`
233
+
Name string `json:"name"`
234
+
NameTrigram string `json:"name_trigram"`
235
+
Description string `json:"description"`
236
+
Website string `json:"website"`
237
+
Topics []string `json:"topics"`
238
+
TopicsExact []string `json:"topics_exact"`
239
+
Knot string `json:"knot"`
240
+
Language string `json:"language"`
241
+
}
242
+
243
+
func makeRepoData(repo *models.Repo) *repoData {
244
+
return &repoData{
245
+
ID: repo.Id,
246
+
RepoAt: repo.RepoAt().String(),
247
+
Did: repo.Did,
248
+
Name: repo.Name,
249
+
NameTrigram: repo.Name,
250
+
Description: repo.Description,
251
+
Website: repo.Website,
252
+
Topics: repo.Topics,
253
+
TopicsExact: repo.Topics,
254
+
Knot: repo.Knot,
255
+
Language: repo.RepoStats.Language,
256
+
}
257
+
}
258
+
259
+
// Type returns the document type, for bleve's mapping.Classifier interface.
260
+
func (r *repoData) Type() string {
261
+
return repoIndexerDocType
262
+
}
263
+
264
+
type SearchResult struct {
265
+
Hits []int64
266
+
Total uint64
267
+
}
268
+
269
+
const maxBatchSize = 20
270
+
271
+
func (ix *Indexer) Index(ctx context.Context, repos ...models.Repo) error {
272
+
batch := bleveutil.NewFlushingBatch(ix.indexer, maxBatchSize)
273
+
for _, repo := range repos {
274
+
repoData := makeRepoData(&repo)
275
+
if err := batch.Index(base36.Encode(repo.Id), repoData); err != nil {
276
+
return err
277
+
}
278
+
}
279
+
return batch.Flush()
280
+
}
281
+
282
+
func (ix *Indexer) Delete(ctx context.Context, repoID int64) error {
283
+
return ix.indexer.Delete(base36.Encode(repoID))
284
+
}
285
+
286
+
func (ix *Indexer) Search(ctx context.Context, opts models.RepoSearchOptions) (*SearchResult, error) {
287
+
var musts []query.Query
288
+
var mustNots []query.Query
289
+
290
+
for _, keyword := range opts.Keywords {
291
+
musts = append(musts, bleve.NewDisjunctionQuery(
292
+
bleveutil.MatchAndQuery("name", keyword, repoIndexerAnalyzer, 0),
293
+
bleveutil.MatchAndQuery("name_trigram", keyword, "trigram", 0),
294
+
bleveutil.MatchAndQuery("description", keyword, repoIndexerAnalyzer, 0),
295
+
bleveutil.MatchAndQuery("website", keyword, repoIndexerAnalyzer, 0),
296
+
bleveutil.MatchAndQuery("topics", keyword, repoIndexerAnalyzer, 0),
297
+
))
298
+
}
299
+
300
+
for _, phrase := range opts.Phrases {
301
+
musts = append(musts, bleve.NewDisjunctionQuery(
302
+
bleveutil.MatchPhraseQuery("name", phrase, repoIndexerAnalyzer),
303
+
bleveutil.MatchPhraseQuery("description", phrase, repoIndexerAnalyzer),
304
+
bleveutil.MatchPhraseQuery("website", phrase, repoIndexerAnalyzer),
305
+
bleveutil.MatchPhraseQuery("topics", phrase, repoIndexerAnalyzer),
306
+
))
307
+
}
308
+
309
+
for _, keyword := range opts.NegatedKeywords {
310
+
mustNots = append(mustNots, bleve.NewDisjunctionQuery(
311
+
bleveutil.MatchAndQuery("name", keyword, repoIndexerAnalyzer, 0),
312
+
bleveutil.MatchAndQuery("name_trigram", keyword, "trigram", 0),
313
+
bleveutil.MatchAndQuery("description", keyword, repoIndexerAnalyzer, 0),
314
+
bleveutil.MatchAndQuery("website", keyword, repoIndexerAnalyzer, 0),
315
+
bleveutil.MatchAndQuery("topics", keyword, repoIndexerAnalyzer, 0),
316
+
))
317
+
}
318
+
319
+
for _, phrase := range opts.NegatedPhrases {
320
+
mustNots = append(mustNots, bleve.NewDisjunctionQuery(
321
+
bleveutil.MatchPhraseQuery("name", phrase, repoIndexerAnalyzer),
322
+
bleveutil.MatchPhraseQuery("description", phrase, repoIndexerAnalyzer),
323
+
bleveutil.MatchPhraseQuery("website", phrase, repoIndexerAnalyzer),
324
+
bleveutil.MatchPhraseQuery("topics", phrase, repoIndexerAnalyzer),
325
+
))
326
+
}
327
+
328
+
// keyword filters
329
+
if opts.Language != "" {
330
+
musts = append(musts, bleveutil.MatchAndQuery("language", opts.Language, "keyword_lowercase", 0))
331
+
}
332
+
333
+
if opts.Knot != "" {
334
+
musts = append(musts, bleveutil.KeywordFieldQuery("knot", opts.Knot))
335
+
}
336
+
337
+
if opts.Did != "" {
338
+
musts = append(musts, bleveutil.KeywordFieldQuery("did", opts.Did))
339
+
}
340
+
341
+
for _, topic := range opts.Topics {
342
+
musts = append(musts, bleveutil.MatchAndQuery("topics_exact", topic, "keyword_lowercase", 0))
343
+
}
344
+
345
+
for _, topic := range opts.NegatedTopics {
346
+
mustNots = append(mustNots, bleveutil.MatchAndQuery("topics_exact", topic, "keyword_lowercase", 0))
347
+
}
348
+
349
+
indexerQuery := bleve.NewBooleanQuery()
350
+
if len(musts) == 0 {
351
+
musts = append(musts, bleve.NewMatchAllQuery())
352
+
}
353
+
indexerQuery.AddMust(musts...)
354
+
indexerQuery.AddMustNot(mustNots...)
355
+
searchReq := bleve.NewSearchRequestOptions(indexerQuery, opts.Page.Limit, opts.Page.Offset, false)
356
+
res, err := ix.indexer.SearchInContext(ctx, searchReq)
357
+
if err != nil {
358
+
return nil, nil
359
+
}
360
+
ret := &SearchResult{
361
+
Total: res.Total,
362
+
Hits: make([]int64, len(res.Hits)),
363
+
}
364
+
for i, hit := range res.Hits {
365
+
id, err := base36.Decode(hit.ID)
366
+
if err != nil {
367
+
return nil, err
368
+
}
369
+
ret.Hits[i] = id
370
+
}
371
+
return ret, nil
372
+
}
+639
appview/indexer/repos/indexer_test.go
+639
appview/indexer/repos/indexer_test.go
···
1
+
package repos_indexer
2
+
3
+
import (
4
+
"context"
5
+
"os"
6
+
"testing"
7
+
8
+
"github.com/blevesearch/bleve/v2"
9
+
"github.com/stretchr/testify/assert"
10
+
"github.com/stretchr/testify/require"
11
+
"tangled.org/core/appview/models"
12
+
"tangled.org/core/appview/pagination"
13
+
)
14
+
15
+
func setupTestIndexer(t *testing.T) (*Indexer, func()) {
16
+
t.Helper()
17
+
18
+
tmpDir, err := os.MkdirTemp("", "repo_indexer_test")
19
+
require.NoError(t, err)
20
+
21
+
ix := NewIndexer(tmpDir)
22
+
23
+
mapping, err := generateRepoIndexMapping()
24
+
require.NoError(t, err)
25
+
26
+
indexer, err := bleve.New(tmpDir, mapping)
27
+
require.NoError(t, err)
28
+
ix.indexer = indexer
29
+
30
+
cleanup := func() {
31
+
ix.indexer.Close()
32
+
os.RemoveAll(tmpDir)
33
+
}
34
+
35
+
return ix, cleanup
36
+
}
37
+
38
+
func TestBasicIndexingAndSearch(t *testing.T) {
39
+
ix, cleanup := setupTestIndexer(t)
40
+
defer cleanup()
41
+
42
+
ctx := context.Background()
43
+
44
+
err := ix.Index(ctx,
45
+
models.Repo{
46
+
Id: 1,
47
+
Did: "did:plc:alice",
48
+
Name: "web-framework",
49
+
Knot: "example.com",
50
+
Description: "A modern web framework for Go",
51
+
Website: "https://example.com/web-framework",
52
+
Topics: []string{"web", "framework", "golang"},
53
+
RepoStats: &models.RepoStats{Language: "Go"},
54
+
},
55
+
models.Repo{
56
+
Id: 2,
57
+
Did: "did:plc:bob",
58
+
Name: "cli-tool",
59
+
Knot: "example.com",
60
+
Description: "Command line utility for developers",
61
+
Website: "",
62
+
Topics: []string{"cli", "tool"},
63
+
RepoStats: &models.RepoStats{Language: "Rust"},
64
+
},
65
+
models.Repo{
66
+
Id: 3,
67
+
Did: "did:plc:alice",
68
+
Name: "javascript-parser",
69
+
Knot: "example.com",
70
+
Description: "Fast JavaScript parser",
71
+
Website: "",
72
+
Topics: []string{"javascript", "parser"},
73
+
RepoStats: &models.RepoStats{Language: "JavaScript"},
74
+
},
75
+
)
76
+
require.NoError(t, err)
77
+
78
+
// search by name
79
+
result, err := ix.Search(ctx, models.RepoSearchOptions{
80
+
Keywords: []string{"framework"},
81
+
Page: pagination.Page{Limit: 10},
82
+
})
83
+
require.NoError(t, err)
84
+
assert.Equal(t, uint64(1), result.Total)
85
+
assert.Contains(t, result.Hits, int64(1))
86
+
87
+
// search by description
88
+
result, err = ix.Search(ctx, models.RepoSearchOptions{
89
+
Keywords: []string{"utility"},
90
+
Page: pagination.Page{Limit: 10},
91
+
})
92
+
require.NoError(t, err)
93
+
assert.Equal(t, uint64(1), result.Total)
94
+
assert.Contains(t, result.Hits, int64(2))
95
+
96
+
// search by website
97
+
result, err = ix.Search(ctx, models.RepoSearchOptions{
98
+
Keywords: []string{"example.com/web-framework"},
99
+
Page: pagination.Page{Limit: 10},
100
+
})
101
+
require.NoError(t, err)
102
+
assert.Equal(t, uint64(1), result.Total)
103
+
assert.Contains(t, result.Hits, int64(1))
104
+
}
105
+
106
+
func TestLanguageFiltering(t *testing.T) {
107
+
ix, cleanup := setupTestIndexer(t)
108
+
defer cleanup()
109
+
110
+
ctx := context.Background()
111
+
112
+
err := ix.Index(ctx,
113
+
models.Repo{
114
+
Id: 1,
115
+
Did: "did:plc:alice",
116
+
Name: "go-project",
117
+
RepoStats: &models.RepoStats{Language: "Go"},
118
+
},
119
+
models.Repo{
120
+
Id: 2,
121
+
Did: "did:plc:bob",
122
+
Name: "rust-project",
123
+
RepoStats: &models.RepoStats{Language: "Rust"},
124
+
},
125
+
models.Repo{
126
+
Id: 3,
127
+
Did: "did:plc:alice",
128
+
Name: "another-go-project",
129
+
RepoStats: &models.RepoStats{Language: "Go"},
130
+
},
131
+
)
132
+
require.NoError(t, err)
133
+
134
+
// filter by go language
135
+
result, err := ix.Search(ctx, models.RepoSearchOptions{
136
+
Language: "Go",
137
+
Page: pagination.Page{Limit: 10},
138
+
})
139
+
require.NoError(t, err)
140
+
assert.Equal(t, uint64(2), result.Total)
141
+
assert.Contains(t, result.Hits, int64(1))
142
+
assert.Contains(t, result.Hits, int64(3))
143
+
144
+
// filter by rust language
145
+
result, err = ix.Search(ctx, models.RepoSearchOptions{
146
+
Language: "Rust",
147
+
Page: pagination.Page{Limit: 10},
148
+
})
149
+
require.NoError(t, err)
150
+
assert.Equal(t, uint64(1), result.Total)
151
+
assert.Contains(t, result.Hits, int64(2))
152
+
}
153
+
154
+
func TestTopicExactMatching(t *testing.T) {
155
+
ix, cleanup := setupTestIndexer(t)
156
+
defer cleanup()
157
+
158
+
ctx := context.Background()
159
+
160
+
err := ix.Index(ctx,
161
+
models.Repo{
162
+
Id: 1,
163
+
Did: "did:plc:alice",
164
+
Name: "js-tool",
165
+
Topics: []string{"javascript", "tool"},
166
+
RepoStats: &models.RepoStats{},
167
+
},
168
+
models.Repo{
169
+
Id: 2,
170
+
Did: "did:plc:bob",
171
+
Name: "java-app",
172
+
Topics: []string{"java", "application"},
173
+
RepoStats: &models.RepoStats{},
174
+
},
175
+
models.Repo{
176
+
Id: 3,
177
+
Did: "did:plc:alice",
178
+
Name: "cli-tool",
179
+
Topics: []string{"cli", "tool"},
180
+
RepoStats: &models.RepoStats{},
181
+
},
182
+
)
183
+
require.NoError(t, err)
184
+
185
+
// exact match for "javascript" topic
186
+
result, err := ix.Search(ctx, models.RepoSearchOptions{
187
+
Topics: []string{"javascript"},
188
+
Page: pagination.Page{Limit: 10},
189
+
})
190
+
require.NoError(t, err)
191
+
assert.Equal(t, uint64(1), result.Total)
192
+
assert.Contains(t, result.Hits, int64(1))
193
+
194
+
// exact match for "tool" topic (should match repos 1 and 3)
195
+
result, err = ix.Search(ctx, models.RepoSearchOptions{
196
+
Topics: []string{"tool"},
197
+
Page: pagination.Page{Limit: 10},
198
+
})
199
+
require.NoError(t, err)
200
+
assert.Equal(t, uint64(2), result.Total)
201
+
assert.Contains(t, result.Hits, int64(1))
202
+
assert.Contains(t, result.Hits, int64(3))
203
+
}
204
+
205
+
func TestTopicTextSearch(t *testing.T) {
206
+
ix, cleanup := setupTestIndexer(t)
207
+
defer cleanup()
208
+
209
+
ctx := context.Background()
210
+
211
+
err := ix.Index(ctx,
212
+
models.Repo{
213
+
Id: 1,
214
+
Did: "did:plc:alice",
215
+
Name: "js-tool",
216
+
Topics: []string{"JavaScript"},
217
+
RepoStats: &models.RepoStats{},
218
+
},
219
+
models.Repo{
220
+
Id: 2,
221
+
Did: "did:plc:bob",
222
+
Name: "java-app",
223
+
Topics: []string{"Java"},
224
+
RepoStats: &models.RepoStats{},
225
+
},
226
+
)
227
+
require.NoError(t, err)
228
+
229
+
result, err := ix.Search(ctx, models.RepoSearchOptions{
230
+
Keywords: []string{"Java"},
231
+
Page: pagination.Page{Limit: 10},
232
+
})
233
+
require.NoError(t, err)
234
+
assert.Equal(t, uint64(2), result.Total)
235
+
assert.Contains(t, result.Hits, int64(1))
236
+
assert.Contains(t, result.Hits, int64(2))
237
+
}
238
+
239
+
func TestNegatedFilters(t *testing.T) {
240
+
ix, cleanup := setupTestIndexer(t)
241
+
defer cleanup()
242
+
243
+
ctx := context.Background()
244
+
245
+
err := ix.Index(ctx,
246
+
models.Repo{
247
+
Id: 1,
248
+
Did: "did:plc:alice",
249
+
Name: "active-project",
250
+
Description: "An active development project",
251
+
Topics: []string{"active"},
252
+
RepoStats: &models.RepoStats{Language: "Go"},
253
+
},
254
+
models.Repo{
255
+
Id: 2,
256
+
Did: "did:plc:bob",
257
+
Name: "archived-project",
258
+
Description: "An archived project",
259
+
Topics: []string{"archived"},
260
+
RepoStats: &models.RepoStats{Language: "Python"},
261
+
},
262
+
models.Repo{
263
+
Id: 3,
264
+
Did: "did:plc:alice",
265
+
Name: "another-project",
266
+
Description: "Another active project",
267
+
Topics: []string{"active"},
268
+
RepoStats: &models.RepoStats{Language: "Go"},
269
+
},
270
+
)
271
+
require.NoError(t, err)
272
+
273
+
// exclude archived topic
274
+
result, err := ix.Search(ctx, models.RepoSearchOptions{
275
+
NegatedTopics: []string{"archived"},
276
+
Page: pagination.Page{Limit: 10},
277
+
})
278
+
require.NoError(t, err)
279
+
assert.Equal(t, uint64(2), result.Total)
280
+
assert.Contains(t, result.Hits, int64(1))
281
+
assert.Contains(t, result.Hits, int64(3))
282
+
283
+
// exclude keyword "archived"
284
+
result, err = ix.Search(ctx, models.RepoSearchOptions{
285
+
NegatedKeywords: []string{"archived"},
286
+
Page: pagination.Page{Limit: 10},
287
+
})
288
+
require.NoError(t, err)
289
+
assert.Equal(t, uint64(2), result.Total)
290
+
assert.Contains(t, result.Hits, int64(1))
291
+
assert.Contains(t, result.Hits, int64(3))
292
+
293
+
// exclude phrase
294
+
result, err = ix.Search(ctx, models.RepoSearchOptions{
295
+
NegatedPhrases: []string{"archived project"},
296
+
Page: pagination.Page{Limit: 10},
297
+
})
298
+
require.NoError(t, err)
299
+
assert.Equal(t, uint64(2), result.Total)
300
+
assert.Contains(t, result.Hits, int64(1))
301
+
assert.Contains(t, result.Hits, int64(3))
302
+
}
303
+
304
+
func TestPagination(t *testing.T) {
305
+
ix, cleanup := setupTestIndexer(t)
306
+
defer cleanup()
307
+
308
+
ctx := context.Background()
309
+
310
+
// index multiple repos
311
+
var repos []models.Repo
312
+
for i := 1; i <= 25; i++ {
313
+
repos = append(repos, models.Repo{
314
+
Id: int64(i),
315
+
Did: "did:plc:alice",
316
+
Name: "project",
317
+
Topics: []string{"test"},
318
+
RepoStats: &models.RepoStats{},
319
+
})
320
+
}
321
+
err := ix.Index(ctx, repos...)
322
+
require.NoError(t, err)
323
+
324
+
// first page
325
+
result, err := ix.Search(ctx, models.RepoSearchOptions{
326
+
Topics: []string{"test"},
327
+
Page: pagination.Page{Limit: 10, Offset: 0},
328
+
})
329
+
require.NoError(t, err)
330
+
assert.Equal(t, uint64(25), result.Total)
331
+
assert.Len(t, result.Hits, 10)
332
+
333
+
// second page
334
+
result, err = ix.Search(ctx, models.RepoSearchOptions{
335
+
Topics: []string{"test"},
336
+
Page: pagination.Page{Limit: 10, Offset: 10},
337
+
})
338
+
require.NoError(t, err)
339
+
assert.Equal(t, uint64(25), result.Total)
340
+
assert.Len(t, result.Hits, 10)
341
+
342
+
// third page - 5 items
343
+
result, err = ix.Search(ctx, models.RepoSearchOptions{
344
+
Topics: []string{"test"},
345
+
Page: pagination.Page{Limit: 10, Offset: 20},
346
+
})
347
+
require.NoError(t, err)
348
+
assert.Equal(t, uint64(25), result.Total)
349
+
assert.Len(t, result.Hits, 5)
350
+
}
351
+
352
+
func TestUpdateReindex(t *testing.T) {
353
+
ix, cleanup := setupTestIndexer(t)
354
+
defer cleanup()
355
+
356
+
ctx := context.Background()
357
+
358
+
// initial index
359
+
err := ix.Index(ctx, models.Repo{
360
+
Id: 1,
361
+
Did: "did:plc:alice",
362
+
Name: "my-project",
363
+
Description: "Initial description",
364
+
Topics: []string{"initial"},
365
+
RepoStats: &models.RepoStats{Language: "Go"},
366
+
})
367
+
require.NoError(t, err)
368
+
369
+
// search for initial state
370
+
result, err := ix.Search(ctx, models.RepoSearchOptions{
371
+
Keywords: []string{"Initial"},
372
+
Page: pagination.Page{Limit: 10},
373
+
})
374
+
require.NoError(t, err)
375
+
assert.Equal(t, uint64(1), result.Total)
376
+
377
+
// update the repo
378
+
err = ix.Index(ctx, models.Repo{
379
+
Id: 1,
380
+
Did: "did:plc:alice",
381
+
Name: "my-project",
382
+
Description: "Updated description",
383
+
Topics: []string{"updated"},
384
+
RepoStats: &models.RepoStats{Language: "Rust"},
385
+
})
386
+
require.NoError(t, err)
387
+
388
+
// search for old description should return nothing
389
+
result, err = ix.Search(ctx, models.RepoSearchOptions{
390
+
Keywords: []string{"Initial"},
391
+
Page: pagination.Page{Limit: 10},
392
+
})
393
+
require.NoError(t, err)
394
+
assert.Equal(t, uint64(0), result.Total)
395
+
396
+
// search for new description should work
397
+
result, err = ix.Search(ctx, models.RepoSearchOptions{
398
+
Keywords: []string{"Updated"},
399
+
Page: pagination.Page{Limit: 10},
400
+
})
401
+
require.NoError(t, err)
402
+
assert.Equal(t, uint64(1), result.Total)
403
+
404
+
// language should be updated
405
+
result, err = ix.Search(ctx, models.RepoSearchOptions{
406
+
Language: "Rust",
407
+
Page: pagination.Page{Limit: 10},
408
+
})
409
+
require.NoError(t, err)
410
+
assert.Equal(t, uint64(1), result.Total)
411
+
}
412
+
413
+
func TestEmptyResults(t *testing.T) {
414
+
ix, cleanup := setupTestIndexer(t)
415
+
defer cleanup()
416
+
417
+
ctx := context.Background()
418
+
419
+
err := ix.Index(ctx, models.Repo{
420
+
Id: 1,
421
+
Did: "did:plc:alice",
422
+
Name: "my-project",
423
+
RepoStats: &models.RepoStats{},
424
+
})
425
+
require.NoError(t, err)
426
+
427
+
// search for non-existent keyword
428
+
result, err := ix.Search(ctx, models.RepoSearchOptions{
429
+
Keywords: []string{"nonexistent"},
430
+
Page: pagination.Page{Limit: 10},
431
+
})
432
+
require.NoError(t, err)
433
+
assert.Equal(t, uint64(0), result.Total)
434
+
assert.Empty(t, result.Hits)
435
+
436
+
// search for non-existent language
437
+
result, err = ix.Search(ctx, models.RepoSearchOptions{
438
+
Language: "NonexistentLanguage",
439
+
Page: pagination.Page{Limit: 10},
440
+
})
441
+
require.NoError(t, err)
442
+
assert.Equal(t, uint64(0), result.Total)
443
+
assert.Empty(t, result.Hits)
444
+
}
445
+
446
+
func TestCombinedFilters(t *testing.T) {
447
+
ix, cleanup := setupTestIndexer(t)
448
+
defer cleanup()
449
+
450
+
ctx := context.Background()
451
+
452
+
err := ix.Index(ctx,
453
+
models.Repo{
454
+
Id: 1,
455
+
Did: "did:plc:alice",
456
+
Name: "web-server",
457
+
Knot: "example.com",
458
+
Description: "A web server in Go",
459
+
Topics: []string{"web", "server"},
460
+
RepoStats: &models.RepoStats{Language: "Go"},
461
+
},
462
+
models.Repo{
463
+
Id: 2,
464
+
Did: "did:plc:bob",
465
+
Name: "web-client",
466
+
Knot: "example.org",
467
+
Description: "A web client in Rust",
468
+
Topics: []string{"web", "client"},
469
+
RepoStats: &models.RepoStats{Language: "Rust"},
470
+
},
471
+
models.Repo{
472
+
Id: 3,
473
+
Did: "did:plc:alice",
474
+
Name: "cli-tool",
475
+
Knot: "example.com",
476
+
Description: "A CLI tool in Go",
477
+
Topics: []string{"cli", "tool"},
478
+
RepoStats: &models.RepoStats{Language: "Go"},
479
+
},
480
+
)
481
+
require.NoError(t, err)
482
+
483
+
// combine language + topic + keyword
484
+
result, err := ix.Search(ctx, models.RepoSearchOptions{
485
+
Language: "Go",
486
+
Topics: []string{"web"},
487
+
Keywords: []string{"server"},
488
+
Page: pagination.Page{Limit: 10},
489
+
})
490
+
require.NoError(t, err)
491
+
assert.Equal(t, uint64(1), result.Total)
492
+
assert.Contains(t, result.Hits, int64(1))
493
+
494
+
// combine did + language
495
+
result, err = ix.Search(ctx, models.RepoSearchOptions{
496
+
Did: "did:plc:alice",
497
+
Language: "Go",
498
+
Page: pagination.Page{Limit: 10},
499
+
})
500
+
require.NoError(t, err)
501
+
assert.Equal(t, uint64(2), result.Total)
502
+
assert.Contains(t, result.Hits, int64(1))
503
+
assert.Contains(t, result.Hits, int64(3))
504
+
505
+
// combine knot + language
506
+
result, err = ix.Search(ctx, models.RepoSearchOptions{
507
+
Knot: "example.com",
508
+
Language: "Go",
509
+
Page: pagination.Page{Limit: 10},
510
+
})
511
+
require.NoError(t, err)
512
+
assert.Equal(t, uint64(2), result.Total)
513
+
assert.Contains(t, result.Hits, int64(1))
514
+
assert.Contains(t, result.Hits, int64(3))
515
+
}
516
+
517
+
func TestRepoWithoutLanguage(t *testing.T) {
518
+
ix, cleanup := setupTestIndexer(t)
519
+
defer cleanup()
520
+
521
+
ctx := context.Background()
522
+
523
+
err := ix.Index(ctx,
524
+
models.Repo{
525
+
Id: 1,
526
+
Did: "did:plc:alice",
527
+
Name: "project-with-language",
528
+
RepoStats: &models.RepoStats{Language: "Go"},
529
+
},
530
+
models.Repo{
531
+
Id: 2,
532
+
Did: "did:plc:bob",
533
+
Name: "project-without-language",
534
+
RepoStats: &models.RepoStats{Language: ""},
535
+
},
536
+
)
537
+
require.NoError(t, err)
538
+
539
+
// search without language filter should return both
540
+
result, err := ix.Search(ctx, models.RepoSearchOptions{
541
+
Keywords: []string{"project"},
542
+
Page: pagination.Page{Limit: 10},
543
+
})
544
+
require.NoError(t, err)
545
+
assert.Equal(t, uint64(2), result.Total)
546
+
547
+
// language filter should only return repo with language
548
+
result, err = ix.Search(ctx, models.RepoSearchOptions{
549
+
Language: "Go",
550
+
Page: pagination.Page{Limit: 10},
551
+
})
552
+
require.NoError(t, err)
553
+
assert.Equal(t, uint64(1), result.Total)
554
+
assert.Contains(t, result.Hits, int64(1))
555
+
}
556
+
557
+
func TestRepoWithoutTopics(t *testing.T) {
558
+
ix, cleanup := setupTestIndexer(t)
559
+
defer cleanup()
560
+
561
+
ctx := context.Background()
562
+
563
+
err := ix.Index(ctx,
564
+
models.Repo{
565
+
Id: 1,
566
+
Did: "did:plc:alice",
567
+
Name: "project-with-topics",
568
+
Topics: []string{"cli", "tool"},
569
+
RepoStats: &models.RepoStats{},
570
+
},
571
+
models.Repo{
572
+
Id: 2,
573
+
Did: "did:plc:bob",
574
+
Name: "project-without-topics",
575
+
Topics: []string{},
576
+
RepoStats: &models.RepoStats{},
577
+
},
578
+
)
579
+
require.NoError(t, err)
580
+
581
+
// topic filter should only return repo with topics
582
+
result, err := ix.Search(ctx, models.RepoSearchOptions{
583
+
Topics: []string{"cli"},
584
+
Page: pagination.Page{Limit: 10},
585
+
})
586
+
require.NoError(t, err)
587
+
assert.Equal(t, uint64(1), result.Total)
588
+
assert.Contains(t, result.Hits, int64(1))
589
+
590
+
// general search should return both
591
+
result, err = ix.Search(ctx, models.RepoSearchOptions{
592
+
Keywords: []string{"project"},
593
+
Page: pagination.Page{Limit: 10},
594
+
})
595
+
require.NoError(t, err)
596
+
assert.Equal(t, uint64(2), result.Total)
597
+
}
598
+
599
+
func TestDelete(t *testing.T) {
600
+
ix, cleanup := setupTestIndexer(t)
601
+
defer cleanup()
602
+
603
+
ctx := context.Background()
604
+
605
+
err := ix.Index(ctx,
606
+
models.Repo{
607
+
Id: 1,
608
+
Did: "did:plc:alice",
609
+
Name: "to-delete",
610
+
RepoStats: &models.RepoStats{},
611
+
},
612
+
models.Repo{
613
+
Id: 2,
614
+
Did: "did:plc:bob",
615
+
Name: "to-keep",
616
+
RepoStats: &models.RepoStats{},
617
+
},
618
+
)
619
+
require.NoError(t, err)
620
+
621
+
// verify both exist
622
+
result, err := ix.Search(ctx, models.RepoSearchOptions{
623
+
Page: pagination.Page{Limit: 10},
624
+
})
625
+
require.NoError(t, err)
626
+
assert.Equal(t, uint64(2), result.Total)
627
+
628
+
// delete repo 1
629
+
err = ix.Delete(ctx, 1)
630
+
require.NoError(t, err)
631
+
632
+
// verify only one remains
633
+
result, err = ix.Search(ctx, models.RepoSearchOptions{
634
+
Page: pagination.Page{Limit: 10},
635
+
})
636
+
require.NoError(t, err)
637
+
assert.Equal(t, uint64(1), result.Total)
638
+
assert.Contains(t, result.Hits, int64(2))
639
+
}
+23
appview/models/search.go
+23
appview/models/search.go
···
53
53
len(o.LabelValues) > 0 || len(o.NegatedLabelValues) > 0 ||
54
54
len(o.NegatedKeywords) > 0 || len(o.NegatedPhrases) > 0
55
55
}
56
+
57
+
type RepoSearchOptions struct {
58
+
Keywords []string // text search across name, description, website, topics
59
+
Phrases []string // phrase search
60
+
61
+
Language string // exact match on primary language
62
+
Knot string // filter by knot domain
63
+
Did string // filter by owner DID
64
+
Topics []string // exact topic matches
65
+
66
+
NegatedKeywords []string
67
+
NegatedPhrases []string
68
+
NegatedTopics []string
69
+
70
+
Page pagination.Page
71
+
}
72
+
73
+
func (o *RepoSearchOptions) HasSearchFilters() bool {
74
+
return len(o.Keywords) > 0 || len(o.Phrases) > 0 ||
75
+
o.Language != "" || o.Did != "" ||
76
+
len(o.Topics) > 0 || len(o.NegatedTopics) > 0 ||
77
+
len(o.NegatedKeywords) > 0 || len(o.NegatedPhrases) > 0
78
+
}
History
1 round
0 comments
oppi.li
submitted
#0
1 commit
expand
collapse
appview/indexer: add indexer for repos
Signed-off-by: oppiliappan <me@oppi.li>
2/3 failed, 1/3 success
expand
collapse
expand 0 comments
pull request successfully merged