appview/indexer: add pulls indexer #708

merged
opened by boltless.me targeting master from boltless.me/core: feat/search
Changed files
+296
appview
indexer
models
pulls
+3
appview/indexer/indexer.go
··· 6 6 7 7 "tangled.org/core/appview/db" 8 8 issues_indexer "tangled.org/core/appview/indexer/issues" 9 + pulls_indexer "tangled.org/core/appview/indexer/pulls" 9 10 "tangled.org/core/appview/notify" 10 11 tlog "tangled.org/core/log" 11 12 ) 12 13 13 14 type Indexer struct { 14 15 Issues *issues_indexer.Indexer 16 + Pulls *pulls_indexer.Indexer 15 17 logger *slog.Logger 16 18 notify.BaseNotifier 17 19 } ··· 29 31 func (ix *Indexer) Init(ctx context.Context, db *db.DB) error { 30 32 ctx = tlog.IntoContext(ctx, ix.logger) 31 33 ix.Issues.Init(ctx, db) 34 + ix.Pulls.Init(ctx, db) 32 35 return nil 33 36 }
+27
appview/indexer/notifier.go
··· 36 36 l.Error("failed to delete an issue", "err", err) 37 37 } 38 38 } 39 + 40 + func (ix *Indexer) NewPull(ctx context.Context, pull *models.Pull) { 41 + l := log.FromContext(ctx).With("notifier", "indexer", "pull", pull) 42 + l.Debug("indexing new pr") 43 + err := ix.Pulls.Index(ctx, pull) 44 + if err != nil { 45 + l.Error("failed to index a pr", "err", err) 46 + } 47 + } 48 + 49 + func (ix *Indexer) NewPullMerged(ctx context.Context, pull *models.Pull) { 50 + l := log.FromContext(ctx).With("notifier", "indexer", "pull", pull) 51 + l.Debug("updating a pr") 52 + err := ix.Pulls.Index(ctx, pull) 53 + if err != nil { 54 + l.Error("failed to index a pr", "err", err) 55 + } 56 + } 57 + 58 + func (ix *Indexer) NewPullClosed(ctx context.Context, pull *models.Pull) { 59 + l := log.FromContext(ctx).With("notifier", "indexer", "pull", pull) 60 + l.Debug("updating a pr") 61 + err := ix.Pulls.Index(ctx, pull) 62 + if err != nil { 63 + l.Error("failed to index a pr", "err", err) 64 + } 65 + }
+255
appview/indexer/pulls/indexer.go
··· 1 + // heavily inspired by gitea's model (basically copy-pasted) 2 + package pulls_indexer 3 + 4 + import ( 5 + "context" 6 + "errors" 7 + "log" 8 + "os" 9 + 10 + "github.com/blevesearch/bleve/v2" 11 + "github.com/blevesearch/bleve/v2/analysis/analyzer/custom" 12 + "github.com/blevesearch/bleve/v2/analysis/token/camelcase" 13 + "github.com/blevesearch/bleve/v2/analysis/token/lowercase" 14 + "github.com/blevesearch/bleve/v2/analysis/token/unicodenorm" 15 + "github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode" 16 + "github.com/blevesearch/bleve/v2/index/upsidedown" 17 + "github.com/blevesearch/bleve/v2/mapping" 18 + "github.com/blevesearch/bleve/v2/search/query" 19 + "tangled.org/core/appview/db" 20 + "tangled.org/core/appview/indexer/base36" 21 + "tangled.org/core/appview/indexer/bleve" 22 + "tangled.org/core/appview/models" 23 + tlog "tangled.org/core/log" 24 + ) 25 + 26 + const ( 27 + pullIndexerAnalyzer = "pullIndexer" 28 + pullIndexerDocType = "pullIndexerDocType" 29 + 30 + unicodeNormalizeName = "uicodeNormalize" 31 + ) 32 + 33 + type Indexer struct { 34 + indexer bleve.Index 35 + path string 36 + } 37 + 38 + func NewIndexer(indexDir string) *Indexer { 39 + return &Indexer{ 40 + path: indexDir, 41 + } 42 + } 43 + 44 + // Init initializes the indexer 45 + func (ix *Indexer) Init(ctx context.Context, e db.Execer) { 46 + l := tlog.FromContext(ctx) 47 + existed, err := ix.intialize(ctx) 48 + if err != nil { 49 + log.Fatalln("failed to initialize pull indexer", err) 50 + } 51 + if !existed { 52 + l.Debug("Populating the pull indexer") 53 + err := PopulateIndexer(ctx, ix, e) 54 + if err != nil { 55 + log.Fatalln("failed to populate pull indexer", err) 56 + } 57 + } 58 + l.Info("Initialized the pull indexer") 59 + } 60 + 61 + func generatePullIndexMapping() (mapping.IndexMapping, error) { 62 + mapping := bleve.NewIndexMapping() 63 + docMapping := bleve.NewDocumentMapping() 64 + 65 + textFieldMapping := bleve.NewTextFieldMapping() 66 + textFieldMapping.Store = false 67 + textFieldMapping.IncludeInAll = false 68 + 69 + keywordFieldMapping := bleve.NewKeywordFieldMapping() 70 + keywordFieldMapping.Store = false 71 + keywordFieldMapping.IncludeInAll = false 72 + 73 + // numericFieldMapping := bleve.NewNumericFieldMapping() 74 + 75 + docMapping.AddFieldMappingsAt("title", textFieldMapping) 76 + docMapping.AddFieldMappingsAt("body", textFieldMapping) 77 + 78 + docMapping.AddFieldMappingsAt("repo_at", keywordFieldMapping) 79 + docMapping.AddFieldMappingsAt("state", keywordFieldMapping) 80 + 81 + err := mapping.AddCustomTokenFilter(unicodeNormalizeName, map[string]any{ 82 + "type": unicodenorm.Name, 83 + "form": unicodenorm.NFC, 84 + }) 85 + if err != nil { 86 + return nil, err 87 + } 88 + 89 + err = mapping.AddCustomAnalyzer(pullIndexerAnalyzer, map[string]any{ 90 + "type": custom.Name, 91 + "char_filters": []string{}, 92 + "tokenizer": unicode.Name, 93 + "token_filters": []string{unicodeNormalizeName, camelcase.Name, lowercase.Name}, 94 + }) 95 + if err != nil { 96 + return nil, err 97 + } 98 + 99 + mapping.DefaultAnalyzer = pullIndexerAnalyzer 100 + mapping.AddDocumentMapping(pullIndexerDocType, docMapping) 101 + mapping.AddDocumentMapping("_all", bleve.NewDocumentDisabledMapping()) 102 + mapping.DefaultMapping = bleve.NewDocumentDisabledMapping() 103 + 104 + return mapping, nil 105 + } 106 + 107 + func (ix *Indexer) intialize(ctx context.Context) (bool, error) { 108 + if ix.indexer != nil { 109 + return false, errors.New("indexer is already initialized") 110 + } 111 + 112 + indexer, err := openIndexer(ctx, ix.path) 113 + if err != nil { 114 + return false, err 115 + } 116 + if indexer != nil { 117 + ix.indexer = indexer 118 + return true, nil 119 + } 120 + 121 + mapping, err := generatePullIndexMapping() 122 + if err != nil { 123 + return false, err 124 + } 125 + indexer, err = bleve.New(ix.path, mapping) 126 + if err != nil { 127 + return false, err 128 + } 129 + 130 + ix.indexer = indexer 131 + 132 + return false, nil 133 + } 134 + 135 + func openIndexer(ctx context.Context, path string) (bleve.Index, error) { 136 + l := tlog.FromContext(ctx) 137 + indexer, err := bleve.Open(path) 138 + if err != nil { 139 + if errors.Is(err, upsidedown.IncompatibleVersion) { 140 + l.Info("Indexer was built with a previous version of bleve, deleting and rebuilding") 141 + return nil, os.RemoveAll(path) 142 + } 143 + return nil, nil 144 + } 145 + return indexer, nil 146 + } 147 + 148 + func PopulateIndexer(ctx context.Context, ix *Indexer, e db.Execer) error { 149 + l := tlog.FromContext(ctx) 150 + 151 + pulls, err := db.GetPulls(e) 152 + if err != nil { 153 + return err 154 + } 155 + count := len(pulls) 156 + err = ix.Index(ctx, pulls...) 157 + if err != nil { 158 + return err 159 + } 160 + l.Info("pulls indexed", "count", count) 161 + return err 162 + } 163 + 164 + // pullData data stored and will be indexed 165 + type pullData struct { 166 + ID int64 `json:"id"` 167 + RepoAt string `json:"repo_at"` 168 + PullID int `json:"pull_id"` 169 + Title string `json:"title"` 170 + Body string `json:"body"` 171 + State string `json:"state"` 172 + 173 + Comments []pullCommentData `json:"comments"` 174 + } 175 + 176 + func makePullData(pull *models.Pull) *pullData { 177 + return &pullData{ 178 + ID: int64(pull.ID), 179 + RepoAt: pull.RepoAt.String(), 180 + PullID: pull.PullId, 181 + Title: pull.Title, 182 + Body: pull.Body, 183 + State: pull.State.String(), 184 + } 185 + } 186 + 187 + // Type returns the document type, for bleve's mapping.Classifier interface. 188 + func (i *pullData) Type() string { 189 + return pullIndexerDocType 190 + } 191 + 192 + type pullCommentData struct { 193 + Body string `json:"body"` 194 + } 195 + 196 + type searchResult struct { 197 + Hits []int64 198 + Total uint64 199 + } 200 + 201 + const maxBatchSize = 20 202 + 203 + func (ix *Indexer) Index(ctx context.Context, pulls ...*models.Pull) error { 204 + batch := bleveutil.NewFlushingBatch(ix.indexer, maxBatchSize) 205 + for _, pull := range pulls { 206 + pullData := makePullData(pull) 207 + if err := batch.Index(base36.Encode(pullData.ID), pullData); err != nil { 208 + return err 209 + } 210 + } 211 + return batch.Flush() 212 + } 213 + 214 + func (ix *Indexer) Delete(ctx context.Context, pullID int64) error { 215 + return ix.indexer.Delete(base36.Encode(pullID)) 216 + } 217 + 218 + // Search searches for pulls 219 + func (ix *Indexer) Search(ctx context.Context, opts models.PullSearchOptions) (*searchResult, error) { 220 + var queries []query.Query 221 + 222 + // TODO(boltless): remove this after implementing pulls page pagination 223 + limit := opts.Page.Limit 224 + if limit == 0 { 225 + limit = 500 226 + } 227 + 228 + if opts.Keyword != "" { 229 + queries = append(queries, bleve.NewDisjunctionQuery( 230 + bleveutil.MatchAndQuery("title", opts.Keyword, pullIndexerAnalyzer, 0), 231 + bleveutil.MatchAndQuery("body", opts.Keyword, pullIndexerAnalyzer, 0), 232 + )) 233 + } 234 + queries = append(queries, bleveutil.KeywordFieldQuery("repo_at", opts.RepoAt)) 235 + queries = append(queries, bleveutil.KeywordFieldQuery("state", opts.State.String())) 236 + 237 + var indexerQuery query.Query = bleve.NewConjunctionQuery(queries...) 238 + searchReq := bleve.NewSearchRequestOptions(indexerQuery, limit, opts.Page.Offset, false) 239 + res, err := ix.indexer.SearchInContext(ctx, searchReq) 240 + if err != nil { 241 + return nil, nil 242 + } 243 + ret := &searchResult{ 244 + Total: res.Total, 245 + Hits: make([]int64, len(res.Hits)), 246 + } 247 + for i, hit := range res.Hits { 248 + id, err := base36.Decode(hit.ID) 249 + if err != nil { 250 + return nil, err 251 + } 252 + ret.Hits[i] = id 253 + } 254 + return ret, nil 255 + }
+8
appview/models/search.go
··· 10 10 Page pagination.Page 11 11 } 12 12 13 + type PullSearchOptions struct { 14 + Keyword string 15 + RepoAt string 16 + State PullState 17 + 18 + Page pagination.Page 19 + } 20 + 13 21 // func (so *SearchOptions) ToFilters() []filter { 14 22 // var filters []filter 15 23 // if so.IsOpen != nil {
+3
appview/pulls/pulls.go
··· 2170 2170 s.pages.Notice(w, "pull-merge-error", "Failed to merge pull request. Try again later.") 2171 2171 return 2172 2172 } 2173 + p.State = models.PullMerged 2173 2174 } 2174 2175 2175 2176 err = tx.Commit() ··· 2243 2244 s.pages.Notice(w, "pull-close", "Failed to close pull.") 2244 2245 return 2245 2246 } 2247 + p.State = models.PullClosed 2246 2248 } 2247 2249 2248 2250 // Commit the transaction ··· 2315 2317 s.pages.Notice(w, "pull-close", "Failed to close pull.") 2316 2318 return 2317 2319 } 2320 + p.State = models.PullOpen 2318 2321 } 2319 2322 2320 2323 // Commit the transaction