wip: add indexer mappings #495

merged
opened by boltless.me targeting master from boltless.me/core: feat/search
Changed files
+90 -4
appview
db
indexer
issues
+1
appview/db/issues.go
··· 434 434 return nil, nil, err 435 435 } 436 436 437 + issue.RepoAt = repoAt 437 438 createdTime, err := time.Parse(time.RFC3339, createdAt) 438 439 if err != nil { 439 440 return nil, nil, err
+89 -4
appview/indexer/issues/indexer.go
··· 1 + // heavily inspired by gitea's model (basically copy-pasted) 1 2 package issues_indexer 2 3 3 4 import ( ··· 7 8 "os" 8 9 9 10 "github.com/blevesearch/bleve/v2" 11 + "github.com/blevesearch/bleve/v2/analysis/analyzer/custom" 12 + "github.com/blevesearch/bleve/v2/analysis/token/camelcase" 13 + "github.com/blevesearch/bleve/v2/analysis/token/lowercase" 14 + "github.com/blevesearch/bleve/v2/analysis/token/unicodenorm" 15 + "github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode" 10 16 "github.com/blevesearch/bleve/v2/index/upsidedown" 17 + "github.com/blevesearch/bleve/v2/mapping" 11 18 "github.com/blevesearch/bleve/v2/search/query" 12 19 "tangled.sh/tangled.sh/core/appview/db" 13 20 "tangled.sh/tangled.sh/core/appview/indexer/base36" ··· 16 23 "tangled.sh/tangled.sh/core/appview/pagination" 17 24 ) 18 25 26 + const ( 27 + issueIndexerAnalyzer = "issueIndexer" 28 + issueIndexerDocType = "issueIndexerDocType" 29 + 30 + unicodeNormalizeName = "uicodeNormalize" 31 + ) 32 + 19 33 type Indexer struct { 20 34 indexer bleve.Index 21 35 path string ··· 43 57 log.Println("Initialized the issue indexer") 44 58 } 45 59 60 + func generateIssueIndexMapping() (mapping.IndexMapping, error) { 61 + mapping := bleve.NewIndexMapping() 62 + docMapping := bleve.NewDocumentMapping() 63 + 64 + textFieldMapping := bleve.NewTextFieldMapping() 65 + textFieldMapping.Store = false 66 + textFieldMapping.IncludeInAll = false 67 + 68 + boolFieldMapping := bleve.NewBooleanFieldMapping() 69 + boolFieldMapping.Store = false 70 + boolFieldMapping.IncludeInAll = false 71 + 72 + keywordFieldMapping := bleve.NewKeywordFieldMapping() 73 + keywordFieldMapping.Store = false 74 + keywordFieldMapping.IncludeInAll = false 75 + 76 + // numericFieldMapping := bleve.NewNumericFieldMapping() 77 + 78 + docMapping.AddFieldMappingsAt("title", textFieldMapping) 79 + docMapping.AddFieldMappingsAt("body", textFieldMapping) 80 + 81 + docMapping.AddFieldMappingsAt("repo_at", keywordFieldMapping) 82 + docMapping.AddFieldMappingsAt("is_open", boolFieldMapping) 83 + 84 + err := mapping.AddCustomTokenFilter(unicodeNormalizeName, map[string]any{ 85 + "type": unicodenorm.Name, 86 + "form": unicodenorm.NFC, 87 + }) 88 + if err != nil { 89 + return nil, err 90 + } 91 + 92 + err = mapping.AddCustomAnalyzer(issueIndexerAnalyzer, map[string]any{ 93 + "type": custom.Name, 94 + "char_filters": []string{}, 95 + "tokenizer": unicode.Name, 96 + "token_filters": []string{unicodeNormalizeName, camelcase.Name, lowercase.Name}, 97 + }) 98 + if err != nil { 99 + return nil, err 100 + } 101 + 102 + mapping.DefaultAnalyzer = issueIndexerAnalyzer 103 + mapping.AddDocumentMapping(issueIndexerDocType, docMapping) 104 + mapping.AddDocumentMapping("_all", bleve.NewDocumentDisabledMapping()) 105 + mapping.DefaultMapping = bleve.NewDocumentDisabledMapping() 106 + 107 + return mapping, nil 108 + } 109 + 46 110 func (ix *Indexer) intialize(_ context.Context) (bool, error) { 47 111 if ix.indexer != nil { 48 112 return false, errors.New("indexer is already initialized") ··· 57 121 return true, nil 58 122 } 59 123 60 - mapping := bleve.NewIndexMapping() 124 + mapping, err := generateIssueIndexMapping() 125 + if err != nil { 126 + return false, err 127 + } 61 128 indexer, err = bleve.New(ix.path, mapping) 62 129 if err != nil { 63 130 return false, err ··· 96 163 } 97 164 dataList = append(dataList, &IssueData{ 98 165 ID: issue.ID, 166 + RepoAt: issue.RepoAt.String(), 99 167 IssueID: issue.IssueId, 100 168 Title: issue.Title, 101 169 Body: issue.Body, 102 170 IsOpen: issue.Open, 103 171 }) 172 + log.Println(dataList[len(dataList)-1]) 104 173 } 105 174 err = ix.Index(ctx, dataList...) 106 175 if err != nil { ··· 117 186 // IssueData data stored and will be indexed 118 187 type IssueData struct { 119 188 ID int64 `json:"id"` 189 + RepoAt string `json:"repo_at"` 120 190 IssueID int `json:"issue_id"` 121 191 Title string `json:"title"` 122 192 Body string `json:"body"` ··· 125 195 Comments []IssueCommentData `json:"comments"` 126 196 } 127 197 198 + // Type returns the document type, for bleve's mapping.Classifier interface. 199 + func (i *IssueData) Type() string { 200 + return issueIndexerDocType 201 + } 202 + 203 + 128 204 type IssueCommentData struct { 129 205 Body string `json:"body"` 130 206 } ··· 152 228 153 229 if opts.Keyword != "" { 154 230 queries = append(queries, bleve.NewDisjunctionQuery( 155 - matchAndQuery(opts.Keyword, "title"), 156 - matchAndQuery(opts.Keyword, "body"), 231 + matchAndQuery(opts.Keyword, "title", issueIndexerAnalyzer, 0), 232 + matchAndQuery(opts.Keyword, "body", issueIndexerAnalyzer, 0), 157 233 )) 158 234 } 235 + queries = append(queries, keywordFieldQuery(opts.RepoAt, "repo_at")) 159 236 queries = append(queries, boolFieldQuery(opts.IsOpen, "is_open")) 160 237 // TODO: append more queries 161 238 ··· 179 256 return ret, nil 180 257 } 181 258 182 - func matchAndQuery(keyword, field string) query.Query { 259 + func matchAndQuery(keyword, field, analyzer string, fuzziness int) query.Query { 183 260 q := bleve.NewMatchQuery(keyword) 184 261 q.FieldVal = field 262 + q.Analyzer = analyzer 263 + q.Fuzziness = fuzziness 185 264 return q 186 265 } 187 266 ··· 190 269 q.FieldVal = field 191 270 return q 192 271 } 272 + 273 + func keywordFieldQuery(keyword, field string) query.Query { 274 + q := bleve.NewTermQuery(keyword) 275 + q.FieldVal = field 276 + return q 277 + }