wip: add indexer mappings #495

merged
opened by boltless.me targeting master from boltless.me/core: feat/search
Changed files
+78 -4
appview
indexer
issues
+78 -4
appview/indexer/issues/indexer.go
··· 7 7 "os" 8 8 9 9 "github.com/blevesearch/bleve/v2" 10 + "github.com/blevesearch/bleve/v2/analysis/analyzer/custom" 11 + "github.com/blevesearch/bleve/v2/analysis/token/camelcase" 12 + "github.com/blevesearch/bleve/v2/analysis/token/lowercase" 13 + "github.com/blevesearch/bleve/v2/analysis/token/unicodenorm" 14 + "github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode" 10 15 "github.com/blevesearch/bleve/v2/index/upsidedown" 16 + "github.com/blevesearch/bleve/v2/mapping" 11 17 "github.com/blevesearch/bleve/v2/search/query" 12 18 "tangled.org/core/appview/db" 13 19 "tangled.org/core/appview/indexer/base36" ··· 17 23 tlog "tangled.org/core/log" 18 24 ) 19 25 26 + const ( 27 + issueIndexerAnalyzer = "issueIndexer" 28 + issueIndexerDocType = "issueIndexerDocType" 29 + 30 + unicodeNormalizeName = "uicodeNormalize" 31 + ) 32 + 20 33 type Indexer struct { 21 34 indexer bleve.Index 22 35 path string ··· 45 58 l.Info("Initialized the issue indexer") 46 59 } 47 60 61 + func generateIssueIndexMapping() (mapping.IndexMapping, error) { 62 + mapping := bleve.NewIndexMapping() 63 + docMapping := bleve.NewDocumentMapping() 64 + 65 + textFieldMapping := bleve.NewTextFieldMapping() 66 + textFieldMapping.Store = false 67 + textFieldMapping.IncludeInAll = false 68 + 69 + boolFieldMapping := bleve.NewBooleanFieldMapping() 70 + boolFieldMapping.Store = false 71 + boolFieldMapping.IncludeInAll = false 72 + 73 + keywordFieldMapping := bleve.NewKeywordFieldMapping() 74 + keywordFieldMapping.Store = false 75 + keywordFieldMapping.IncludeInAll = false 76 + 77 + // numericFieldMapping := bleve.NewNumericFieldMapping() 78 + 79 + docMapping.AddFieldMappingsAt("title", textFieldMapping) 80 + docMapping.AddFieldMappingsAt("body", textFieldMapping) 81 + 82 + docMapping.AddFieldMappingsAt("repo_at", keywordFieldMapping) 83 + docMapping.AddFieldMappingsAt("is_open", boolFieldMapping) 84 + 85 + err := mapping.AddCustomTokenFilter(unicodeNormalizeName, map[string]any{ 86 + "type": unicodenorm.Name, 87 + "form": unicodenorm.NFC, 88 + }) 89 + if err != nil { 90 + return nil, err 91 + } 92 + 93 + err = mapping.AddCustomAnalyzer(issueIndexerAnalyzer, map[string]any{ 94 + "type": custom.Name, 95 + "char_filters": []string{}, 96 + "tokenizer": unicode.Name, 97 + "token_filters": []string{unicodeNormalizeName, camelcase.Name, lowercase.Name}, 98 + }) 99 + if err != nil { 100 + return nil, err 101 + } 102 + 103 + mapping.DefaultAnalyzer = issueIndexerAnalyzer 104 + mapping.AddDocumentMapping(issueIndexerDocType, docMapping) 105 + mapping.AddDocumentMapping("_all", bleve.NewDocumentDisabledMapping()) 106 + mapping.DefaultMapping = bleve.NewDocumentDisabledMapping() 107 + 108 + return mapping, nil 109 + } 110 + 48 111 func (ix *Indexer) intialize(ctx context.Context) (bool, error) { 49 112 if ix.indexer != nil { 50 113 return false, errors.New("indexer is already initialized") ··· 59 122 return true, nil 60 123 } 61 124 62 - mapping := bleve.NewIndexMapping() 125 + mapping, err := generateIssueIndexMapping() 126 + if err != nil { 127 + return false, err 128 + } 63 129 indexer, err = bleve.New(ix.path, mapping) 64 130 if err != nil { 65 131 return false, err ··· 122 188 } 123 189 } 124 190 191 + // Type returns the document type, for bleve's mapping.Classifier interface. 192 + func (i *issueData) Type() string { 193 + return issueIndexerDocType 194 + } 195 + 196 + 125 197 type IssueCommentData struct { 126 198 Body string `json:"body"` 127 199 } ··· 150 222 151 223 if opts.Keyword != "" { 152 224 queries = append(queries, bleve.NewDisjunctionQuery( 153 - matchAndQuery(opts.Keyword, "title"), 154 - matchAndQuery(opts.Keyword, "body"), 225 + matchAndQuery(opts.Keyword, "title", issueIndexerAnalyzer, 0), 226 + matchAndQuery(opts.Keyword, "body", issueIndexerAnalyzer, 0), 155 227 )) 156 228 } 157 229 queries = append(queries, keywordFieldQuery(opts.RepoAt, "repo_at")) ··· 178 250 return ret, nil 179 251 } 180 252 181 - func matchAndQuery(keyword, field string) query.Query { 253 + func matchAndQuery(keyword, field, analyzer string, fuzziness int) query.Query { 182 254 q := bleve.NewMatchQuery(keyword) 183 255 q.FieldVal = field 256 + q.Analyzer = analyzer 257 + q.Fuzziness = fuzziness 184 258 return q 185 259 } 186 260