wip: add indexer mappings #495

merged
opened by boltless.me targeting master from boltless.me/core: feat/search
Changed files
+78 -4
appview
indexer
bleve
issues
+3 -1
appview/indexer/bleve/query.go
··· 5 "github.com/blevesearch/bleve/v2/search/query" 6 ) 7 8 - func MatchAndQuery(field, keyword string) query.Query { 9 q := bleve.NewMatchQuery(keyword) 10 q.FieldVal = field 11 return q 12 } 13
··· 5 "github.com/blevesearch/bleve/v2/search/query" 6 ) 7 8 + func MatchAndQuery(field, keyword, analyzer string, fuzziness int) query.Query { 9 q := bleve.NewMatchQuery(keyword) 10 q.FieldVal = field 11 + q.Analyzer = analyzer 12 + q.Fuzziness = fuzziness 13 return q 14 } 15
+75 -3
appview/indexer/issues/indexer.go
··· 8 "os" 9 10 "github.com/blevesearch/bleve/v2" 11 "github.com/blevesearch/bleve/v2/index/upsidedown" 12 "github.com/blevesearch/bleve/v2/search/query" 13 "tangled.org/core/appview/db" 14 "tangled.org/core/appview/indexer/base36" ··· 18 tlog "tangled.org/core/log" 19 ) 20 21 type Indexer struct { 22 indexer bleve.Index 23 path string ··· 46 l.Info("Initialized the issue indexer") 47 } 48 49 func (ix *Indexer) intialize(ctx context.Context) (bool, error) { 50 if ix.indexer != nil { 51 return false, errors.New("indexer is already initialized") ··· 60 return true, nil 61 } 62 63 - mapping := bleve.NewIndexMapping() 64 indexer, err = bleve.New(ix.path, mapping) 65 if err != nil { 66 return false, err ··· 123 } 124 } 125 126 type IssueCommentData struct { 127 Body string `json:"body"` 128 } ··· 151 152 if opts.Keyword != "" { 153 queries = append(queries, bleve.NewDisjunctionQuery( 154 - bleveutil.MatchAndQuery("title", opts.Keyword), 155 - bleveutil.MatchAndQuery("body", opts.Keyword), 156 )) 157 } 158 queries = append(queries, bleveutil.KeywordFieldQuery("repo_at", opts.RepoAt))
··· 8 "os" 9 10 "github.com/blevesearch/bleve/v2" 11 + "github.com/blevesearch/bleve/v2/analysis/analyzer/custom" 12 + "github.com/blevesearch/bleve/v2/analysis/token/camelcase" 13 + "github.com/blevesearch/bleve/v2/analysis/token/lowercase" 14 + "github.com/blevesearch/bleve/v2/analysis/token/unicodenorm" 15 + "github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode" 16 "github.com/blevesearch/bleve/v2/index/upsidedown" 17 + "github.com/blevesearch/bleve/v2/mapping" 18 "github.com/blevesearch/bleve/v2/search/query" 19 "tangled.org/core/appview/db" 20 "tangled.org/core/appview/indexer/base36" ··· 24 tlog "tangled.org/core/log" 25 ) 26 27 + const ( 28 + issueIndexerAnalyzer = "issueIndexer" 29 + issueIndexerDocType = "issueIndexerDocType" 30 + 31 + unicodeNormalizeName = "uicodeNormalize" 32 + ) 33 + 34 type Indexer struct { 35 indexer bleve.Index 36 path string ··· 59 l.Info("Initialized the issue indexer") 60 } 61 62 + func generateIssueIndexMapping() (mapping.IndexMapping, error) { 63 + mapping := bleve.NewIndexMapping() 64 + docMapping := bleve.NewDocumentMapping() 65 + 66 + textFieldMapping := bleve.NewTextFieldMapping() 67 + textFieldMapping.Store = false 68 + textFieldMapping.IncludeInAll = false 69 + 70 + boolFieldMapping := bleve.NewBooleanFieldMapping() 71 + boolFieldMapping.Store = false 72 + boolFieldMapping.IncludeInAll = false 73 + 74 + keywordFieldMapping := bleve.NewKeywordFieldMapping() 75 + keywordFieldMapping.Store = false 76 + keywordFieldMapping.IncludeInAll = false 77 + 78 + // numericFieldMapping := bleve.NewNumericFieldMapping() 79 + 80 + docMapping.AddFieldMappingsAt("title", textFieldMapping) 81 + docMapping.AddFieldMappingsAt("body", textFieldMapping) 82 + 83 + docMapping.AddFieldMappingsAt("repo_at", keywordFieldMapping) 84 + docMapping.AddFieldMappingsAt("is_open", boolFieldMapping) 85 + 86 + err := mapping.AddCustomTokenFilter(unicodeNormalizeName, map[string]any{ 87 + "type": unicodenorm.Name, 88 + "form": unicodenorm.NFC, 89 + }) 90 + if err != nil { 91 + return nil, err 92 + } 93 + 94 + err = mapping.AddCustomAnalyzer(issueIndexerAnalyzer, map[string]any{ 95 + "type": custom.Name, 96 + "char_filters": []string{}, 97 + "tokenizer": unicode.Name, 98 + "token_filters": []string{unicodeNormalizeName, camelcase.Name, lowercase.Name}, 99 + }) 100 + if err != nil { 101 + return nil, err 102 + } 103 + 104 + mapping.DefaultAnalyzer = issueIndexerAnalyzer 105 + mapping.AddDocumentMapping(issueIndexerDocType, docMapping) 106 + mapping.AddDocumentMapping("_all", bleve.NewDocumentDisabledMapping()) 107 + mapping.DefaultMapping = bleve.NewDocumentDisabledMapping() 108 + 109 + return mapping, nil 110 + } 111 + 112 func (ix *Indexer) intialize(ctx context.Context) (bool, error) { 113 if ix.indexer != nil { 114 return false, errors.New("indexer is already initialized") ··· 123 return true, nil 124 } 125 126 + mapping, err := generateIssueIndexMapping() 127 + if err != nil { 128 + return false, err 129 + } 130 indexer, err = bleve.New(ix.path, mapping) 131 if err != nil { 132 return false, err ··· 189 } 190 } 191 192 + // Type returns the document type, for bleve's mapping.Classifier interface. 193 + func (i *issueData) Type() string { 194 + return issueIndexerDocType 195 + } 196 + 197 + 198 type IssueCommentData struct { 199 Body string `json:"body"` 200 } ··· 223 224 if opts.Keyword != "" { 225 queries = append(queries, bleve.NewDisjunctionQuery( 226 + bleveutil.MatchAndQuery("title", opts.Keyword, issueIndexerAnalyzer, 0), 227 + bleveutil.MatchAndQuery("body", opts.Keyword, issueIndexerAnalyzer, 0), 228 )) 229 } 230 queries = append(queries, bleveutil.KeywordFieldQuery("repo_at", opts.RepoAt))