wip: add indexer mappings #495

merged
opened by boltless.me targeting master from boltless.me/core: feat/search
Changed files
+88 -4
appview
indexer
issues
+88 -4
appview/indexer/issues/indexer.go
··· 1 package issues_indexer 2 3 import ( ··· 6 "os" 7 8 "github.com/blevesearch/bleve/v2" 9 "github.com/blevesearch/bleve/v2/index/upsidedown" 10 "github.com/blevesearch/bleve/v2/search/query" 11 "tangled.org/core/appview/db" 12 "tangled.org/core/appview/indexer/base36" ··· 16 tlog "tangled.org/core/log" 17 ) 18 19 type Indexer struct { 20 indexer bleve.Index 21 path string ··· 44 l.Info("Initialized the issue indexer") 45 } 46 47 func (ix *Indexer) intialize(ctx context.Context) (bool, error) { 48 if ix.indexer != nil { 49 return false, errors.New("indexer is already initialized") ··· 58 return true, nil 59 } 60 61 - mapping := bleve.NewIndexMapping() 62 indexer, err = bleve.New(ix.path, mapping) 63 if err != nil { 64 return false, err ··· 94 for _, issue := range issues { 95 dataList = append(dataList, &IssueData{ 96 ID: issue.Id, 97 IssueID: issue.IssueId, 98 Title: issue.Title, 99 Body: issue.Body, ··· 114 // IssueData data stored and will be indexed 115 type IssueData struct { 116 ID int64 `json:"id"` 117 IssueID int `json:"issue_id"` 118 Title string `json:"title"` 119 Body string `json:"body"` ··· 122 Comments []IssueCommentData `json:"comments"` 123 } 124 125 type IssueCommentData struct { 126 Body string `json:"body"` 127 } ··· 149 150 if opts.Keyword != "" { 151 queries = append(queries, bleve.NewDisjunctionQuery( 152 - matchAndQuery(opts.Keyword, "title"), 153 - matchAndQuery(opts.Keyword, "body"), 154 )) 155 } 156 queries = append(queries, boolFieldQuery(opts.IsOpen, "is_open")) 157 // TODO: append more queries 158 ··· 176 return ret, nil 177 } 178 179 - func matchAndQuery(keyword, field string) query.Query { 180 q := bleve.NewMatchQuery(keyword) 181 q.FieldVal = field 182 return q 183 } 184 ··· 187 q.FieldVal = field 188 return q 189 }
··· 1 + // heavily inspired by gitea's model (basically copy-pasted) 2 package issues_indexer 3 4 import ( ··· 7 "os" 8 9 "github.com/blevesearch/bleve/v2" 10 + "github.com/blevesearch/bleve/v2/analysis/analyzer/custom" 11 + "github.com/blevesearch/bleve/v2/analysis/token/camelcase" 12 + "github.com/blevesearch/bleve/v2/analysis/token/lowercase" 13 + "github.com/blevesearch/bleve/v2/analysis/token/unicodenorm" 14 + "github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode" 15 "github.com/blevesearch/bleve/v2/index/upsidedown" 16 + "github.com/blevesearch/bleve/v2/mapping" 17 "github.com/blevesearch/bleve/v2/search/query" 18 "tangled.org/core/appview/db" 19 "tangled.org/core/appview/indexer/base36" ··· 23 tlog "tangled.org/core/log" 24 ) 25 26 + const ( 27 + issueIndexerAnalyzer = "issueIndexer" 28 + issueIndexerDocType = "issueIndexerDocType" 29 + 30 + unicodeNormalizeName = "uicodeNormalize" 31 + ) 32 + 33 type Indexer struct { 34 indexer bleve.Index 35 path string ··· 58 l.Info("Initialized the issue indexer") 59 } 60 61 + func generateIssueIndexMapping() (mapping.IndexMapping, error) { 62 + mapping := bleve.NewIndexMapping() 63 + docMapping := bleve.NewDocumentMapping() 64 + 65 + textFieldMapping := bleve.NewTextFieldMapping() 66 + textFieldMapping.Store = false 67 + textFieldMapping.IncludeInAll = false 68 + 69 + boolFieldMapping := bleve.NewBooleanFieldMapping() 70 + boolFieldMapping.Store = false 71 + boolFieldMapping.IncludeInAll = false 72 + 73 + keywordFieldMapping := bleve.NewKeywordFieldMapping() 74 + keywordFieldMapping.Store = false 75 + keywordFieldMapping.IncludeInAll = false 76 + 77 + // numericFieldMapping := bleve.NewNumericFieldMapping() 78 + 79 + docMapping.AddFieldMappingsAt("title", textFieldMapping) 80 + docMapping.AddFieldMappingsAt("body", textFieldMapping) 81 + 82 + docMapping.AddFieldMappingsAt("repo_at", keywordFieldMapping) 83 + docMapping.AddFieldMappingsAt("is_open", boolFieldMapping) 84 + 85 + err := mapping.AddCustomTokenFilter(unicodeNormalizeName, map[string]any{ 86 + "type": unicodenorm.Name, 87 + "form": unicodenorm.NFC, 88 + }) 89 + if err != nil { 90 + return nil, err 91 + } 92 + 93 + err = mapping.AddCustomAnalyzer(issueIndexerAnalyzer, map[string]any{ 94 + "type": custom.Name, 95 + "char_filters": []string{}, 96 + "tokenizer": unicode.Name, 97 + "token_filters": []string{unicodeNormalizeName, camelcase.Name, lowercase.Name}, 98 + }) 99 + if err != nil { 100 + return nil, err 101 + } 102 + 103 + mapping.DefaultAnalyzer = issueIndexerAnalyzer 104 + mapping.AddDocumentMapping(issueIndexerDocType, docMapping) 105 + mapping.AddDocumentMapping("_all", bleve.NewDocumentDisabledMapping()) 106 + mapping.DefaultMapping = bleve.NewDocumentDisabledMapping() 107 + 108 + return mapping, nil 109 + } 110 + 111 func (ix *Indexer) intialize(ctx context.Context) (bool, error) { 112 if ix.indexer != nil { 113 return false, errors.New("indexer is already initialized") ··· 122 return true, nil 123 } 124 125 + mapping, err := generateIssueIndexMapping() 126 + if err != nil { 127 + return false, err 128 + } 129 indexer, err = bleve.New(ix.path, mapping) 130 if err != nil { 131 return false, err ··· 161 for _, issue := range issues { 162 dataList = append(dataList, &IssueData{ 163 ID: issue.Id, 164 + RepoAt: issue.RepoAt.String(), 165 IssueID: issue.IssueId, 166 Title: issue.Title, 167 Body: issue.Body, ··· 182 // IssueData data stored and will be indexed 183 type IssueData struct { 184 ID int64 `json:"id"` 185 + RepoAt string `json:"repo_at"` 186 IssueID int `json:"issue_id"` 187 Title string `json:"title"` 188 Body string `json:"body"` ··· 191 Comments []IssueCommentData `json:"comments"` 192 } 193 194 + // Type returns the document type, for bleve's mapping.Classifier interface. 195 + func (i *IssueData) Type() string { 196 + return issueIndexerDocType 197 + } 198 + 199 + 200 type IssueCommentData struct { 201 Body string `json:"body"` 202 } ··· 224 225 if opts.Keyword != "" { 226 queries = append(queries, bleve.NewDisjunctionQuery( 227 + matchAndQuery(opts.Keyword, "title", issueIndexerAnalyzer, 0), 228 + matchAndQuery(opts.Keyword, "body", issueIndexerAnalyzer, 0), 229 )) 230 } 231 + queries = append(queries, keywordFieldQuery(opts.RepoAt, "repo_at")) 232 queries = append(queries, boolFieldQuery(opts.IsOpen, "is_open")) 233 // TODO: append more queries 234 ··· 252 return ret, nil 253 } 254 255 + func matchAndQuery(keyword, field, analyzer string, fuzziness int) query.Query { 256 q := bleve.NewMatchQuery(keyword) 257 q.FieldVal = field 258 + q.Analyzer = analyzer 259 + q.Fuzziness = fuzziness 260 return q 261 } 262 ··· 265 q.FieldVal = field 266 return q 267 } 268 + 269 + func keywordFieldQuery(keyword, field string) query.Query { 270 + q := bleve.NewTermQuery(keyword) 271 + q.FieldVal = field 272 + return q 273 + }