Signed-off-by: Seongmin Lee boltlessengineer@proton.me
+88
-4
appview/indexer/issues/indexer.go
+88
-4
appview/indexer/issues/indexer.go
···
1
package issues_indexer
2
3
import (
···
6
"os"
7
8
"github.com/blevesearch/bleve/v2"
9
"github.com/blevesearch/bleve/v2/index/upsidedown"
10
"github.com/blevesearch/bleve/v2/search/query"
11
"tangled.org/core/appview/db"
12
"tangled.org/core/appview/indexer/base36"
···
16
tlog "tangled.org/core/log"
17
)
18
19
type Indexer struct {
20
indexer bleve.Index
21
path string
···
44
l.Info("Initialized the issue indexer")
45
}
46
47
func (ix *Indexer) intialize(ctx context.Context) (bool, error) {
48
if ix.indexer != nil {
49
return false, errors.New("indexer is already initialized")
···
58
return true, nil
59
}
60
61
-
mapping := bleve.NewIndexMapping()
62
indexer, err = bleve.New(ix.path, mapping)
63
if err != nil {
64
return false, err
···
94
for _, issue := range issues {
95
dataList = append(dataList, &IssueData{
96
ID: issue.Id,
97
IssueID: issue.IssueId,
98
Title: issue.Title,
99
Body: issue.Body,
···
114
// IssueData data stored and will be indexed
115
type IssueData struct {
116
ID int64 `json:"id"`
117
IssueID int `json:"issue_id"`
118
Title string `json:"title"`
119
Body string `json:"body"`
···
122
Comments []IssueCommentData `json:"comments"`
123
}
124
125
type IssueCommentData struct {
126
Body string `json:"body"`
127
}
···
149
150
if opts.Keyword != "" {
151
queries = append(queries, bleve.NewDisjunctionQuery(
152
-
matchAndQuery(opts.Keyword, "title"),
153
-
matchAndQuery(opts.Keyword, "body"),
154
))
155
}
156
queries = append(queries, boolFieldQuery(opts.IsOpen, "is_open"))
157
// TODO: append more queries
158
···
176
return ret, nil
177
}
178
179
-
func matchAndQuery(keyword, field string) query.Query {
180
q := bleve.NewMatchQuery(keyword)
181
q.FieldVal = field
182
return q
183
}
184
···
187
q.FieldVal = field
188
return q
189
}
···
1
+
// heavily inspired by gitea's model (basically copy-pasted)
2
package issues_indexer
3
4
import (
···
7
"os"
8
9
"github.com/blevesearch/bleve/v2"
10
+
"github.com/blevesearch/bleve/v2/analysis/analyzer/custom"
11
+
"github.com/blevesearch/bleve/v2/analysis/token/camelcase"
12
+
"github.com/blevesearch/bleve/v2/analysis/token/lowercase"
13
+
"github.com/blevesearch/bleve/v2/analysis/token/unicodenorm"
14
+
"github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode"
15
"github.com/blevesearch/bleve/v2/index/upsidedown"
16
+
"github.com/blevesearch/bleve/v2/mapping"
17
"github.com/blevesearch/bleve/v2/search/query"
18
"tangled.org/core/appview/db"
19
"tangled.org/core/appview/indexer/base36"
···
23
tlog "tangled.org/core/log"
24
)
25
26
+
const (
27
+
issueIndexerAnalyzer = "issueIndexer"
28
+
issueIndexerDocType = "issueIndexerDocType"
29
+
30
+
unicodeNormalizeName = "uicodeNormalize"
31
+
)
32
+
33
type Indexer struct {
34
indexer bleve.Index
35
path string
···
58
l.Info("Initialized the issue indexer")
59
}
60
61
+
func generateIssueIndexMapping() (mapping.IndexMapping, error) {
62
+
mapping := bleve.NewIndexMapping()
63
+
docMapping := bleve.NewDocumentMapping()
64
+
65
+
textFieldMapping := bleve.NewTextFieldMapping()
66
+
textFieldMapping.Store = false
67
+
textFieldMapping.IncludeInAll = false
68
+
69
+
boolFieldMapping := bleve.NewBooleanFieldMapping()
70
+
boolFieldMapping.Store = false
71
+
boolFieldMapping.IncludeInAll = false
72
+
73
+
keywordFieldMapping := bleve.NewKeywordFieldMapping()
74
+
keywordFieldMapping.Store = false
75
+
keywordFieldMapping.IncludeInAll = false
76
+
77
+
// numericFieldMapping := bleve.NewNumericFieldMapping()
78
+
79
+
docMapping.AddFieldMappingsAt("title", textFieldMapping)
80
+
docMapping.AddFieldMappingsAt("body", textFieldMapping)
81
+
82
+
docMapping.AddFieldMappingsAt("repo_at", keywordFieldMapping)
83
+
docMapping.AddFieldMappingsAt("is_open", boolFieldMapping)
84
+
85
+
err := mapping.AddCustomTokenFilter(unicodeNormalizeName, map[string]any{
86
+
"type": unicodenorm.Name,
87
+
"form": unicodenorm.NFC,
88
+
})
89
+
if err != nil {
90
+
return nil, err
91
+
}
92
+
93
+
err = mapping.AddCustomAnalyzer(issueIndexerAnalyzer, map[string]any{
94
+
"type": custom.Name,
95
+
"char_filters": []string{},
96
+
"tokenizer": unicode.Name,
97
+
"token_filters": []string{unicodeNormalizeName, camelcase.Name, lowercase.Name},
98
+
})
99
+
if err != nil {
100
+
return nil, err
101
+
}
102
+
103
+
mapping.DefaultAnalyzer = issueIndexerAnalyzer
104
+
mapping.AddDocumentMapping(issueIndexerDocType, docMapping)
105
+
mapping.AddDocumentMapping("_all", bleve.NewDocumentDisabledMapping())
106
+
mapping.DefaultMapping = bleve.NewDocumentDisabledMapping()
107
+
108
+
return mapping, nil
109
+
}
110
+
111
func (ix *Indexer) intialize(ctx context.Context) (bool, error) {
112
if ix.indexer != nil {
113
return false, errors.New("indexer is already initialized")
···
122
return true, nil
123
}
124
125
+
mapping, err := generateIssueIndexMapping()
126
+
if err != nil {
127
+
return false, err
128
+
}
129
indexer, err = bleve.New(ix.path, mapping)
130
if err != nil {
131
return false, err
···
161
for _, issue := range issues {
162
dataList = append(dataList, &IssueData{
163
ID: issue.Id,
164
+
RepoAt: issue.RepoAt.String(),
165
IssueID: issue.IssueId,
166
Title: issue.Title,
167
Body: issue.Body,
···
182
// IssueData data stored and will be indexed
183
type IssueData struct {
184
ID int64 `json:"id"`
185
+
RepoAt string `json:"repo_at"`
186
IssueID int `json:"issue_id"`
187
Title string `json:"title"`
188
Body string `json:"body"`
···
191
Comments []IssueCommentData `json:"comments"`
192
}
193
194
+
// Type returns the document type, for bleve's mapping.Classifier interface.
195
+
func (i *IssueData) Type() string {
196
+
return issueIndexerDocType
197
+
}
198
+
199
+
200
type IssueCommentData struct {
201
Body string `json:"body"`
202
}
···
224
225
if opts.Keyword != "" {
226
queries = append(queries, bleve.NewDisjunctionQuery(
227
+
matchAndQuery(opts.Keyword, "title", issueIndexerAnalyzer, 0),
228
+
matchAndQuery(opts.Keyword, "body", issueIndexerAnalyzer, 0),
229
))
230
}
231
+
queries = append(queries, keywordFieldQuery(opts.RepoAt, "repo_at"))
232
queries = append(queries, boolFieldQuery(opts.IsOpen, "is_open"))
233
// TODO: append more queries
234
···
252
return ret, nil
253
}
254
255
+
func matchAndQuery(keyword, field, analyzer string, fuzziness int) query.Query {
256
q := bleve.NewMatchQuery(keyword)
257
q.FieldVal = field
258
+
q.Analyzer = analyzer
259
+
q.Fuzziness = fuzziness
260
return q
261
}
262
···
265
q.FieldVal = field
266
return q
267
}
268
+
269
+
func keywordFieldQuery(keyword, field string) query.Query {
270
+
q := bleve.NewTermQuery(keyword)
271
+
q.FieldVal = field
272
+
return q
273
+
}