wip: add indexer mappings #495

merged
opened by boltless.me targeting master from boltless.me/core: feat/search
Changed files
+78 -4
appview
indexer
issues
+78 -4
appview/indexer/issues/indexer.go
··· 7 "os" 8 9 "github.com/blevesearch/bleve/v2" 10 "github.com/blevesearch/bleve/v2/index/upsidedown" 11 "github.com/blevesearch/bleve/v2/search/query" 12 "tangled.org/core/appview/db" 13 "tangled.org/core/appview/indexer/base36" ··· 17 tlog "tangled.org/core/log" 18 ) 19 20 type Indexer struct { 21 indexer bleve.Index 22 path string ··· 45 l.Info("Initialized the issue indexer") 46 } 47 48 func (ix *Indexer) intialize(ctx context.Context) (bool, error) { 49 if ix.indexer != nil { 50 return false, errors.New("indexer is already initialized") ··· 59 return true, nil 60 } 61 62 - mapping := bleve.NewIndexMapping() 63 indexer, err = bleve.New(ix.path, mapping) 64 if err != nil { 65 return false, err ··· 122 } 123 } 124 125 type IssueCommentData struct { 126 Body string `json:"body"` 127 } ··· 150 151 if opts.Keyword != "" { 152 queries = append(queries, bleve.NewDisjunctionQuery( 153 - matchAndQuery(opts.Keyword, "title"), 154 - matchAndQuery(opts.Keyword, "body"), 155 )) 156 } 157 queries = append(queries, keywordFieldQuery(opts.RepoAt, "repo_at")) ··· 178 return ret, nil 179 } 180 181 - func matchAndQuery(keyword, field string) query.Query { 182 q := bleve.NewMatchQuery(keyword) 183 q.FieldVal = field 184 return q 185 } 186
··· 7 "os" 8 9 "github.com/blevesearch/bleve/v2" 10 + "github.com/blevesearch/bleve/v2/analysis/analyzer/custom" 11 + "github.com/blevesearch/bleve/v2/analysis/token/camelcase" 12 + "github.com/blevesearch/bleve/v2/analysis/token/lowercase" 13 + "github.com/blevesearch/bleve/v2/analysis/token/unicodenorm" 14 + "github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode" 15 "github.com/blevesearch/bleve/v2/index/upsidedown" 16 + "github.com/blevesearch/bleve/v2/mapping" 17 "github.com/blevesearch/bleve/v2/search/query" 18 "tangled.org/core/appview/db" 19 "tangled.org/core/appview/indexer/base36" ··· 23 tlog "tangled.org/core/log" 24 ) 25 26 + const ( 27 + issueIndexerAnalyzer = "issueIndexer" 28 + issueIndexerDocType = "issueIndexerDocType" 29 + 30 + unicodeNormalizeName = "uicodeNormalize" 31 + ) 32 + 33 type Indexer struct { 34 indexer bleve.Index 35 path string ··· 58 l.Info("Initialized the issue indexer") 59 } 60 61 + func generateIssueIndexMapping() (mapping.IndexMapping, error) { 62 + mapping := bleve.NewIndexMapping() 63 + docMapping := bleve.NewDocumentMapping() 64 + 65 + textFieldMapping := bleve.NewTextFieldMapping() 66 + textFieldMapping.Store = false 67 + textFieldMapping.IncludeInAll = false 68 + 69 + boolFieldMapping := bleve.NewBooleanFieldMapping() 70 + boolFieldMapping.Store = false 71 + boolFieldMapping.IncludeInAll = false 72 + 73 + keywordFieldMapping := bleve.NewKeywordFieldMapping() 74 + keywordFieldMapping.Store = false 75 + keywordFieldMapping.IncludeInAll = false 76 + 77 + // numericFieldMapping := bleve.NewNumericFieldMapping() 78 + 79 + docMapping.AddFieldMappingsAt("title", textFieldMapping) 80 + docMapping.AddFieldMappingsAt("body", textFieldMapping) 81 + 82 + docMapping.AddFieldMappingsAt("repo_at", keywordFieldMapping) 83 + docMapping.AddFieldMappingsAt("is_open", boolFieldMapping) 84 + 85 + err := mapping.AddCustomTokenFilter(unicodeNormalizeName, map[string]any{ 86 + "type": unicodenorm.Name, 87 + "form": unicodenorm.NFC, 88 + }) 89 + if err != nil { 90 + return nil, err 91 + } 92 + 93 + err = mapping.AddCustomAnalyzer(issueIndexerAnalyzer, map[string]any{ 94 + "type": custom.Name, 95 + "char_filters": []string{}, 96 + "tokenizer": unicode.Name, 97 + "token_filters": []string{unicodeNormalizeName, camelcase.Name, lowercase.Name}, 98 + }) 99 + if err != nil { 100 + return nil, err 101 + } 102 + 103 + mapping.DefaultAnalyzer = issueIndexerAnalyzer 104 + mapping.AddDocumentMapping(issueIndexerDocType, docMapping) 105 + mapping.AddDocumentMapping("_all", bleve.NewDocumentDisabledMapping()) 106 + mapping.DefaultMapping = bleve.NewDocumentDisabledMapping() 107 + 108 + return mapping, nil 109 + } 110 + 111 func (ix *Indexer) intialize(ctx context.Context) (bool, error) { 112 if ix.indexer != nil { 113 return false, errors.New("indexer is already initialized") ··· 122 return true, nil 123 } 124 125 + mapping, err := generateIssueIndexMapping() 126 + if err != nil { 127 + return false, err 128 + } 129 indexer, err = bleve.New(ix.path, mapping) 130 if err != nil { 131 return false, err ··· 188 } 189 } 190 191 + // Type returns the document type, for bleve's mapping.Classifier interface. 192 + func (i *issueData) Type() string { 193 + return issueIndexerDocType 194 + } 195 + 196 + 197 type IssueCommentData struct { 198 Body string `json:"body"` 199 } ··· 222 223 if opts.Keyword != "" { 224 queries = append(queries, bleve.NewDisjunctionQuery( 225 + matchAndQuery(opts.Keyword, "title", issueIndexerAnalyzer, 0), 226 + matchAndQuery(opts.Keyword, "body", issueIndexerAnalyzer, 0), 227 )) 228 } 229 queries = append(queries, keywordFieldQuery(opts.RepoAt, "repo_at")) ··· 250 return ret, nil 251 } 252 253 + func matchAndQuery(keyword, field, analyzer string, fuzziness int) query.Query { 254 q := bleve.NewMatchQuery(keyword) 255 q.FieldVal = field 256 + q.Analyzer = analyzer 257 + q.Fuzziness = fuzziness 258 return q 259 } 260