appview: indexer: add indexer mappings #673

closed
opened by boltless.me targeting master from boltless.me/core: feat/search
Changed files
+88 -4
appview
indexer
issues
+88 -4
appview/indexer/issues/indexer.go
··· 1 + // heavily inspired by gitea's model (basically copy-pasted) 1 2 package issues_indexer 2 3 3 4 import ( ··· 6 7 "os" 7 8 8 9 "github.com/blevesearch/bleve/v2" 10 + "github.com/blevesearch/bleve/v2/analysis/analyzer/custom" 11 + "github.com/blevesearch/bleve/v2/analysis/token/camelcase" 12 + "github.com/blevesearch/bleve/v2/analysis/token/lowercase" 13 + "github.com/blevesearch/bleve/v2/analysis/token/unicodenorm" 14 + "github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode" 9 15 "github.com/blevesearch/bleve/v2/index/upsidedown" 16 + "github.com/blevesearch/bleve/v2/mapping" 10 17 "github.com/blevesearch/bleve/v2/search/query" 11 18 "tangled.org/core/appview/db" 12 19 "tangled.org/core/appview/indexer/base36" ··· 16 23 tlog "tangled.org/core/log" 17 24 ) 18 25 26 + const ( 27 + issueIndexerAnalyzer = "issueIndexer" 28 + issueIndexerDocType = "issueIndexerDocType" 29 + 30 + unicodeNormalizeName = "uicodeNormalize" 31 + ) 32 + 19 33 type Indexer struct { 20 34 indexer bleve.Index 21 35 path string ··· 44 58 l.Info("Initialized the issue indexer") 45 59 } 46 60 61 + func generateIssueIndexMapping() (mapping.IndexMapping, error) { 62 + mapping := bleve.NewIndexMapping() 63 + docMapping := bleve.NewDocumentMapping() 64 + 65 + textFieldMapping := bleve.NewTextFieldMapping() 66 + textFieldMapping.Store = false 67 + textFieldMapping.IncludeInAll = false 68 + 69 + boolFieldMapping := bleve.NewBooleanFieldMapping() 70 + boolFieldMapping.Store = false 71 + boolFieldMapping.IncludeInAll = false 72 + 73 + keywordFieldMapping := bleve.NewKeywordFieldMapping() 74 + keywordFieldMapping.Store = false 75 + keywordFieldMapping.IncludeInAll = false 76 + 77 + // numericFieldMapping := bleve.NewNumericFieldMapping() 78 + 79 + docMapping.AddFieldMappingsAt("title", textFieldMapping) 80 + docMapping.AddFieldMappingsAt("body", textFieldMapping) 81 + 82 + docMapping.AddFieldMappingsAt("repo_at", keywordFieldMapping) 83 + docMapping.AddFieldMappingsAt("is_open", boolFieldMapping) 84 + 85 + err := mapping.AddCustomTokenFilter(unicodeNormalizeName, map[string]any{ 86 + "type": unicodenorm.Name, 87 + "form": unicodenorm.NFC, 88 + }) 89 + if err != nil { 90 + return nil, err 91 + } 92 + 93 + err = mapping.AddCustomAnalyzer(issueIndexerAnalyzer, map[string]any{ 94 + "type": custom.Name, 95 + "char_filters": []string{}, 96 + "tokenizer": unicode.Name, 97 + "token_filters": []string{unicodeNormalizeName, camelcase.Name, lowercase.Name}, 98 + }) 99 + if err != nil { 100 + return nil, err 101 + } 102 + 103 + mapping.DefaultAnalyzer = issueIndexerAnalyzer 104 + mapping.AddDocumentMapping(issueIndexerDocType, docMapping) 105 + mapping.AddDocumentMapping("_all", bleve.NewDocumentDisabledMapping()) 106 + mapping.DefaultMapping = bleve.NewDocumentDisabledMapping() 107 + 108 + return mapping, nil 109 + } 110 + 47 111 func (ix *Indexer) intialize(ctx context.Context) (bool, error) { 48 112 if ix.indexer != nil { 49 113 return false, errors.New("indexer is already initialized") ··· 58 122 return true, nil 59 123 } 60 124 61 - mapping := bleve.NewIndexMapping() 125 + mapping, err := generateIssueIndexMapping() 126 + if err != nil { 127 + return false, err 128 + } 62 129 indexer, err = bleve.New(ix.path, mapping) 63 130 if err != nil { 64 131 return false, err ··· 94 161 for _, issue := range issues { 95 162 dataList = append(dataList, &IssueData{ 96 163 ID: issue.Id, 164 + RepoAt: issue.RepoAt.String(), 97 165 IssueID: issue.IssueId, 98 166 Title: issue.Title, 99 167 Body: issue.Body, ··· 114 182 // IssueData data stored and will be indexed 115 183 type IssueData struct { 116 184 ID int64 `json:"id"` 185 + RepoAt string `json:"repo_at"` 117 186 IssueID int `json:"issue_id"` 118 187 Title string `json:"title"` 119 188 Body string `json:"body"` ··· 122 191 Comments []IssueCommentData `json:"comments"` 123 192 } 124 193 194 + // Type returns the document type, for bleve's mapping.Classifier interface. 195 + func (i *IssueData) Type() string { 196 + return issueIndexerDocType 197 + } 198 + 199 + 125 200 type IssueCommentData struct { 126 201 Body string `json:"body"` 127 202 } ··· 149 224 150 225 if opts.Keyword != "" { 151 226 queries = append(queries, bleve.NewDisjunctionQuery( 152 - matchAndQuery(opts.Keyword, "title"), 153 - matchAndQuery(opts.Keyword, "body"), 227 + matchAndQuery(opts.Keyword, "title", issueIndexerAnalyzer, 0), 228 + matchAndQuery(opts.Keyword, "body", issueIndexerAnalyzer, 0), 154 229 )) 155 230 } 231 + queries = append(queries, keywordFieldQuery(opts.RepoAt, "repo_at")) 156 232 queries = append(queries, boolFieldQuery(opts.IsOpen, "is_open")) 157 233 // TODO: append more queries 158 234 ··· 176 252 return ret, nil 177 253 } 178 254 179 - func matchAndQuery(keyword, field string) query.Query { 255 + func matchAndQuery(keyword, field, analyzer string, fuzziness int) query.Query { 180 256 q := bleve.NewMatchQuery(keyword) 181 257 q.FieldVal = field 258 + q.Analyzer = analyzer 259 + q.Fuzziness = fuzziness 182 260 return q 183 261 } 184 262 ··· 187 265 q.FieldVal = field 188 266 return q 189 267 } 268 + 269 + func keywordFieldQuery(keyword, field string) query.Query { 270 + q := bleve.NewTermQuery(keyword) 271 + q.FieldVal = field 272 + return q 273 + }