cli + tui to publish to leaflet (wip) & manage tasks, notes & watch/read lists ๐Ÿƒ
charm leaflet readability golang

feat: implement document management functionality for search

* developed search engine capabilities for documents using TF-IDF indexing
* utility functions for tokenization and frequency counting for document content

+1427 -5
+8
cmd/main.go
··· 24 24 newBookHandler = handlers.NewBookHandler 25 25 newArticleHandler = handlers.NewArticleHandler 26 26 newPublicationHandler = handlers.NewPublicationHandler 27 + newDocumentHandler = handlers.NewDocumentHandler 27 28 exc = fang.Execute 28 29 ) 29 30 ··· 213 214 return 1 214 215 } 215 216 217 + documentHandler, err := newDocumentHandler() 218 + if err != nil { 219 + log.Error("failed to create document handler", "err", err) 220 + return 1 221 + } 222 + 216 223 root := rootCmd() 217 224 218 225 coreGroups := []CommandGroup{ ··· 220 227 NewNoteCommand(noteHandler), 221 228 NewPublicationCommand(publicationHandler), 222 229 NewArticleCommand(articleHandler), 230 + NewSearchCommand(documentHandler), 223 231 } 224 232 225 233 for _, group := range coreGroups {
+67
cmd/search_commands.go
··· 1 + package main 2 + 3 + import ( 4 + "strings" 5 + 6 + "github.com/spf13/cobra" 7 + "github.com/stormlightlabs/noteleaf/internal/handlers" 8 + ) 9 + 10 + // SearchCommand implements [CommandGroup] for document search commands 11 + type SearchCommand struct { 12 + handler *handlers.DocumentHandler 13 + } 14 + 15 + // NewSearchCommand creates a new search command group 16 + func NewSearchCommand(handler *handlers.DocumentHandler) *SearchCommand { 17 + return &SearchCommand{handler: handler} 18 + } 19 + 20 + func (c *SearchCommand) Create() *cobra.Command { 21 + root := &cobra.Command{ 22 + Use: "search", 23 + Short: "Search notes using TF-IDF", 24 + Long: `Full-text search for notes using Term Frequency-Inverse Document Frequency (TF-IDF) ranking. 25 + 26 + The search engine tokenizes text, builds an inverted index, and ranks results by relevance. 27 + Results are sorted by TF-IDF score, with higher scores indicating better matches.`, 28 + } 29 + 30 + queryCmd := &cobra.Command{ 31 + Use: "query [search terms...]", 32 + Short: "Search for documents matching query terms", 33 + Long: `Search for documents using TF-IDF ranking. 34 + 35 + Examples: 36 + noteleaf search query go programming 37 + noteleaf search query "machine learning" --limit 5`, 38 + Args: cobra.MinimumNArgs(1), 39 + RunE: func(cmd *cobra.Command, args []string) error { 40 + query := strings.Join(args, " ") 41 + limit, _ := cmd.Flags().GetInt("limit") 42 + 43 + return c.handler.Search(cmd.Context(), query, limit) 44 + }, 45 + } 46 + queryCmd.Flags().IntP("limit", "l", 10, "Maximum number of results to return") 47 + root.AddCommand(queryCmd) 48 + 49 + rebuildCmd := &cobra.Command{ 50 + Use: "rebuild", 51 + Short: "Rebuild search index from notes", 52 + Long: `Rebuild the search index from all notes in the database. 53 + 54 + This command: 55 + 1. Clears the existing document index 56 + 2. Copies all notes to the documents table 57 + 3. Builds a new TF-IDF search index 58 + 59 + Run this after adding, updating, or deleting notes to refresh the search index.`, 60 + RunE: func(cmd *cobra.Command, args []string) error { 61 + return c.handler.RebuildIndex(cmd.Context()) 62 + }, 63 + } 64 + root.AddCommand(rebuildCmd) 65 + 66 + return root 67 + }
+91 -5
internal/docs/ROADMAP.md
··· 110 110 - Invalid IDs 111 111 - Invalid flags 112 112 - Schema corruption (already tested in repo) 113 - - [ ] Test cross-platform behavior (Linux/macOS/Windows). 114 113 115 114 ### Packaging 116 115 ··· 134 133 - [ ] Operations 135 134 - [ ] `annotate` 136 135 - [ ] Bulk edit and undo/history 137 - - [ ] `$EDITOR` integration 138 136 - [x] Reports and Views 139 137 - [x] Next actions 140 138 - [x] Completed/waiting/blocked reports ··· 150 148 ### Notes 151 149 152 150 - [ ] Commands 153 - - [ ] `note search` 151 + - [x] `note search` - TF-IDF search via `search query` command 154 152 - [ ] `note tag` 155 153 - [ ] `note recent` 156 154 - [ ] `note templates` 157 155 - [ ] `note archive` 158 156 - [ ] `note export` 159 157 - [ ] Features 160 - - [ ] Full-text search 158 + - [x] Full-text search - TF-IDF ranking with Unicode tokenization 161 159 - [ ] Linking between notes, tasks, and media 160 + 161 + ### Search 162 + 163 + #### Ranking Improvements 164 + 165 + - [ ] BM25 scoring algorithm 166 + - [ ] Implement Okapi BM25 with configurable parameters (k1, b) 167 + - [ ] Field-aware BM25F with title/body weighting 168 + - [ ] Pluggable scoring strategy interface (TF-IDF/BM25 interchangeable) 169 + - [ ] Benchmark against TF-IDF on sample corpus 170 + 171 + #### Query Features 172 + 173 + - [ ] Phrase and proximity queries 174 + - [ ] Positional inverted index (track term positions in documents) 175 + - [ ] Exact phrase matching (`"go programming"`) 176 + - [ ] Proximity scoring (boost when terms appear near each other) 177 + - [ ] Query understanding 178 + - [ ] Synonym expansion with configurable dictionaries 179 + - [ ] Boolean operators (AND, OR, NOT) 180 + - [ ] Field-specific queries (`title:golang body:concurrency`) 181 + - [ ] Spelling correction with edit distance suggestions 182 + - [ ] Query boosting syntax (`title^3 golang`) 183 + 184 + #### Linguistic Processing 185 + 186 + - [ ] Text normalization 187 + - [ ] Porter stemmer for English (run/runs/running โ†’ run) 188 + - [ ] Stopword filtering with domain-specific lists 189 + - [ ] Unicode normalization and diacritic folding 190 + - [ ] Configurable token filter pipeline 191 + - [ ] Multi-language support 192 + - [ ] Language detection 193 + - [ ] Language-specific stemmers 194 + - [ ] CJK tokenization improvements 195 + 196 + #### Advanced Scoring 197 + 198 + - [ ] Learning to Rank 199 + - [ ] Feature extraction (TF-IDF/BM25 scores, term coverage, recency) 200 + - [ ] Click-through rate tracking for relevance feedback 201 + - [ ] Gradient-boosted tree models for re-ranking 202 + - [ ] Evaluation metrics (NDCG, MAP) 203 + - [ ] Non-text signals 204 + - [ ] Document recency scoring 205 + - [ ] Tag-based relevance 206 + - [ ] User interaction signals 207 + 208 + #### Index Management 209 + 210 + - [ ] Persistence and optimization 211 + - [ ] On-disk index snapshots (gob serialization) 212 + - [ ] Segmented indexing with periodic merging (Lucene-style) 213 + - [ ] Incremental updates (add/update/delete without full rebuild) 214 + - [ ] Index versioning and rollback 215 + - [ ] Compression for large corpora 216 + - [ ] Performance 217 + - [ ] Index build benchmarks vs corpus size 218 + - [ ] Query latency monitoring 219 + - [ ] Memory usage profiling 220 + - [ ] Concurrent search support 221 + 222 + #### User Experience 223 + 224 + - [ ] Interactive search interface 225 + - [ ] TUI with real-time search-as-you-type 226 + - [ ] Result navigation with vim keybindings 227 + - [ ] Preview pane showing full note content 228 + - [ ] Filtering by tags, date ranges, doc kind 229 + - [ ] Sort options (relevance, date, alphabetical) 230 + - [ ] Quick actions (open in editor, copy ID, tag) 231 + - [ ] Search result display 232 + - [ ] Snippet generation with matched term highlighting 233 + - [ ] Configurable result limit and pagination 234 + - [ ] Score explanation mode (`--explain` flag) 235 + - [ ] Export results to JSON/CSV 236 + - [ ] CLI 237 + - [ ] Saved search queries and aliases 238 + - [ ] Search history 239 + - [ ] Query latency and result count in output 240 + - [ ] Color-coded relevance scores 162 241 163 242 ### Media 164 243 ··· 446 525 | Tasks | Urgency scoring | Complete | 447 526 | Tasks | Reports and views | Complete | 448 527 | Notes | CRUD | Complete | 449 - | Notes | Search/tagging | Planned | 528 + | Notes | Search (TF-IDF) | Complete | 529 + | Notes | Advanced search | Planned | 530 + | Notes | Tagging | Planned | 450 531 | Publications | AT Protocol sync | Complete | 451 532 | Publications | Post/patch/push | Complete | 452 533 | Publications | Markdown conversion | Complete | ··· 459 540 | System | Configuration management | Complete | 460 541 | System | Synchronization | Future | 461 542 | System | Import/export formats | Future | 543 + 544 + ## Parking Lot 545 + 546 + - [ ] Test cross-platform behavior (Linux/macOS/Windows). 547 + - [ ] `$EDITOR` integration
+153
internal/documents/documents.go
··· 1 + // Term Frequency-Inverse Document Frequency search model for notes 2 + package documents 3 + 4 + import ( 5 + "math" 6 + "regexp" 7 + "sort" 8 + "strings" 9 + "time" 10 + ) 11 + 12 + type DocKind int64 13 + 14 + const ( 15 + NoteDoc DocKind = iota 16 + ArticleDoc 17 + MovieDoc 18 + BookDoc 19 + TVDoc 20 + ) 21 + 22 + type Document struct { 23 + ID int64 24 + Title string 25 + Body string 26 + CreatedAt time.Time 27 + DocKind int64 28 + } 29 + 30 + type Posting struct { 31 + DocID int64 32 + TF int 33 + } 34 + 35 + type Index struct { 36 + Postings map[string][]Posting 37 + DocLengths map[int64]int 38 + NumDocs int 39 + } 40 + 41 + type Result struct { 42 + DocID int64 43 + Score float64 44 + } 45 + 46 + type Searchable interface { 47 + Search(query string, limit int) ([]Result, error) 48 + } 49 + 50 + // Tokenizer handles text tokenization and normalization 51 + type Tokenizer struct { 52 + pattern *regexp.Regexp 53 + } 54 + 55 + // NewTokenizer creates a new tokenizer with Unicode-aware word/number matching 56 + func NewTokenizer() *Tokenizer { 57 + return &Tokenizer{ 58 + pattern: regexp.MustCompile(`\p{L}+\p{M}*|\p{N}+`), 59 + } 60 + } 61 + 62 + // Tokenize splits text into normalized tokens (lowercase words and numbers) 63 + func (t *Tokenizer) Tokenize(text string) []string { 64 + lowered := strings.ToLower(text) 65 + return t.pattern.FindAllString(lowered, -1) 66 + } 67 + 68 + // TokenFrequency computes term frequency map for tokens 69 + func TokenFrequency(tokens []string) map[string]int { 70 + freq := make(map[string]int) 71 + for _, token := range tokens { 72 + freq[token]++ 73 + } 74 + return freq 75 + } 76 + 77 + // BuildIndex constructs a TF-IDF index from a collection of documents 78 + func BuildIndex(docs []Document) *Index { 79 + idx := &Index{ 80 + Postings: make(map[string][]Posting), 81 + DocLengths: make(map[int64]int), 82 + NumDocs: 0, 83 + } 84 + 85 + tokenizer := NewTokenizer() 86 + 87 + for _, doc := range docs { 88 + text := doc.Title + " " + doc.Body 89 + tokens := tokenizer.Tokenize(text) 90 + 91 + idx.NumDocs++ 92 + idx.DocLengths[doc.ID] = len(tokens) 93 + 94 + freq := TokenFrequency(tokens) 95 + 96 + for term, tf := range freq { 97 + idx.Postings[term] = append(idx.Postings[term], Posting{ 98 + DocID: doc.ID, 99 + TF: tf, 100 + }) 101 + } 102 + } 103 + 104 + return idx 105 + } 106 + 107 + // Search performs TF-IDF ranked search on the index 108 + func (idx *Index) Search(query string, limit int) ([]Result, error) { 109 + tokenizer := NewTokenizer() 110 + queryTokens := tokenizer.Tokenize(query) 111 + 112 + if len(queryTokens) == 0 { 113 + return []Result{}, nil 114 + } 115 + 116 + scores := make(map[int64]float64) 117 + 118 + for _, term := range queryTokens { 119 + postings, exists := idx.Postings[term] 120 + if !exists { 121 + continue 122 + } 123 + 124 + df := len(postings) 125 + idf := math.Log(float64(idx.NumDocs) / float64(df)) 126 + 127 + for _, posting := range postings { 128 + tf := float64(posting.TF) 129 + scores[posting.DocID] += tf * idf 130 + } 131 + } 132 + 133 + results := make([]Result, 0, len(scores)) 134 + for docID, score := range scores { 135 + results = append(results, Result{ 136 + DocID: docID, 137 + Score: score, 138 + }) 139 + } 140 + 141 + sort.Slice(results, func(i, j int) bool { 142 + if results[i].Score != results[j].Score { 143 + return results[i].Score > results[j].Score 144 + } 145 + return results[i].DocID > results[j].DocID 146 + }) 147 + 148 + if limit > 0 && limit < len(results) { 149 + results = results[:limit] 150 + } 151 + 152 + return results, nil 153 + }
+380
internal/documents/documents_test.go
··· 1 + package documents 2 + 3 + import ( 4 + "testing" 5 + "time" 6 + ) 7 + 8 + func TestTokenizer(t *testing.T) { 9 + tokenizer := NewTokenizer() 10 + 11 + t.Run("Basic tokenization", func(t *testing.T) { 12 + t.Run("tokenizes simple text", func(t *testing.T) { 13 + tokens := tokenizer.Tokenize("Hello World") 14 + if len(tokens) != 2 { 15 + t.Fatalf("expected 2 tokens, got %d", len(tokens)) 16 + } 17 + if tokens[0] != "hello" || tokens[1] != "world" { 18 + t.Errorf("expected [hello world], got %v", tokens) 19 + } 20 + }) 21 + 22 + t.Run("lowercases all tokens", func(t *testing.T) { 23 + tokens := tokenizer.Tokenize("UPPERCASE MiXeD lowercase") 24 + if len(tokens) != 3 { 25 + t.Fatalf("expected 3 tokens, got %d", len(tokens)) 26 + } 27 + for _, token := range tokens { 28 + if token != "uppercase" && token != "mixed" && token != "lowercase" { 29 + t.Errorf("unexpected token: %s", token) 30 + } 31 + } 32 + }) 33 + 34 + t.Run("handles punctuation", func(t *testing.T) { 35 + tokens := tokenizer.Tokenize("Hello, world! How are you?") 36 + expected := []string{"hello", "world", "how", "are", "you"} 37 + if len(tokens) != len(expected) { 38 + t.Fatalf("expected %d tokens, got %d", len(expected), len(tokens)) 39 + } 40 + for i, token := range tokens { 41 + if token != expected[i] { 42 + t.Errorf("token %d: expected %s, got %s", i, expected[i], token) 43 + } 44 + } 45 + }) 46 + }) 47 + 48 + t.Run("Unicode support", func(t *testing.T) { 49 + t.Run("tokenizes unicode characters", func(t *testing.T) { 50 + tokens := tokenizer.Tokenize("cafรฉ rรฉsumรฉ naรฏve") 51 + if len(tokens) != 3 { 52 + t.Fatalf("expected 3 tokens, got %d", len(tokens)) 53 + } 54 + }) 55 + 56 + t.Run("handles emoji and special characters", func(t *testing.T) { 57 + tokens := tokenizer.Tokenize("hello ๐Ÿ˜€ world") 58 + if len(tokens) != 2 { 59 + t.Fatalf("expected 2 tokens (emoji excluded), got %d", len(tokens)) 60 + } 61 + if tokens[0] != "hello" || tokens[1] != "world" { 62 + t.Errorf("expected [hello world], got %v", tokens) 63 + } 64 + }) 65 + 66 + t.Run("tokenizes CJK characters", func(t *testing.T) { 67 + tokens := tokenizer.Tokenize("ไฝ ๅฅฝ ไธ–็•Œ") 68 + if len(tokens) != 2 { 69 + t.Fatalf("expected 2 tokens, got %d", len(tokens)) 70 + } 71 + }) 72 + }) 73 + 74 + t.Run("Numbers", func(t *testing.T) { 75 + t.Run("tokenizes numbers", func(t *testing.T) { 76 + tokens := tokenizer.Tokenize("test 123 456") 77 + if len(tokens) != 3 { 78 + t.Fatalf("expected 3 tokens, got %d", len(tokens)) 79 + } 80 + if tokens[1] != "123" || tokens[2] != "456" { 81 + t.Errorf("expected numbers to be tokenized, got %v", tokens) 82 + } 83 + }) 84 + 85 + t.Run("handles mixed alphanumeric", func(t *testing.T) { 86 + tokens := tokenizer.Tokenize("version 2 released") 87 + if len(tokens) != 3 { 88 + t.Fatalf("expected 3 tokens, got %d", len(tokens)) 89 + } 90 + }) 91 + }) 92 + 93 + t.Run("Edge cases", func(t *testing.T) { 94 + t.Run("handles empty string", func(t *testing.T) { 95 + tokens := tokenizer.Tokenize("") 96 + if len(tokens) != 0 { 97 + t.Errorf("expected 0 tokens for empty string, got %d", len(tokens)) 98 + } 99 + }) 100 + 101 + t.Run("handles whitespace only", func(t *testing.T) { 102 + tokens := tokenizer.Tokenize(" \t\n ") 103 + if len(tokens) != 0 { 104 + t.Errorf("expected 0 tokens for whitespace, got %d", len(tokens)) 105 + } 106 + }) 107 + 108 + t.Run("handles punctuation only", func(t *testing.T) { 109 + tokens := tokenizer.Tokenize("!@#$%^&*()") 110 + if len(tokens) != 0 { 111 + t.Errorf("expected 0 tokens for punctuation only, got %d", len(tokens)) 112 + } 113 + }) 114 + }) 115 + } 116 + 117 + func TestTokenFrequency(t *testing.T) { 118 + t.Run("counts term frequencies", func(t *testing.T) { 119 + tokens := []string{"hello", "world", "hello", "test"} 120 + freq := TokenFrequency(tokens) 121 + 122 + if freq["hello"] != 2 { 123 + t.Errorf("expected hello frequency 2, got %d", freq["hello"]) 124 + } 125 + if freq["world"] != 1 { 126 + t.Errorf("expected world frequency 1, got %d", freq["world"]) 127 + } 128 + if freq["test"] != 1 { 129 + t.Errorf("expected test frequency 1, got %d", freq["test"]) 130 + } 131 + }) 132 + 133 + t.Run("handles empty token list", func(t *testing.T) { 134 + freq := TokenFrequency([]string{}) 135 + if len(freq) != 0 { 136 + t.Errorf("expected empty frequency map, got %d entries", len(freq)) 137 + } 138 + }) 139 + 140 + t.Run("handles single token", func(t *testing.T) { 141 + freq := TokenFrequency([]string{"single"}) 142 + if freq["single"] != 1 { 143 + t.Errorf("expected frequency 1, got %d", freq["single"]) 144 + } 145 + }) 146 + } 147 + 148 + func TestBuildIndex(t *testing.T) { 149 + now := time.Now() 150 + 151 + t.Run("builds index from documents", func(t *testing.T) { 152 + docs := []Document{ 153 + {ID: 1, Title: "Go Programming", Body: "Go is a great language", CreatedAt: now, DocKind: int64(NoteDoc)}, 154 + {ID: 2, Title: "Python Guide", Body: "Python is versatile", CreatedAt: now, DocKind: int64(ArticleDoc)}, 155 + } 156 + 157 + idx := BuildIndex(docs) 158 + 159 + if idx.NumDocs != 2 { 160 + t.Errorf("expected NumDocs 2, got %d", idx.NumDocs) 161 + } 162 + 163 + if len(idx.DocLengths) != 2 { 164 + t.Errorf("expected 2 document lengths, got %d", len(idx.DocLengths)) 165 + } 166 + 167 + if idx.DocLengths[1] <= 0 || idx.DocLengths[2] <= 0 { 168 + t.Error("document lengths should be positive") 169 + } 170 + 171 + if _, exists := idx.Postings["go"]; !exists { 172 + t.Error("expected 'go' to be in postings") 173 + } 174 + if _, exists := idx.Postings["python"]; !exists { 175 + t.Error("expected 'python' to be in postings") 176 + } 177 + }) 178 + 179 + t.Run("handles empty document list", func(t *testing.T) { 180 + idx := BuildIndex([]Document{}) 181 + if idx.NumDocs != 0 { 182 + t.Errorf("expected NumDocs 0, got %d", idx.NumDocs) 183 + } 184 + if len(idx.Postings) != 0 { 185 + t.Errorf("expected empty postings, got %d entries", len(idx.Postings)) 186 + } 187 + }) 188 + 189 + t.Run("calculates term frequencies correctly", func(t *testing.T) { 190 + docs := []Document{ 191 + {ID: 1, Title: "test", Body: "test test test", CreatedAt: now, DocKind: int64(NoteDoc)}, 192 + } 193 + 194 + idx := BuildIndex(docs) 195 + 196 + postings := idx.Postings["test"] 197 + if len(postings) != 1 { 198 + t.Fatalf("expected 1 posting for 'test', got %d", len(postings)) 199 + } 200 + 201 + if postings[0].TF != 4 { 202 + t.Errorf("expected TF 4 (title + 3 in body), got %d", postings[0].TF) 203 + } 204 + }) 205 + 206 + t.Run("builds postings for multiple documents with same term", func(t *testing.T) { 207 + docs := []Document{ 208 + {ID: 1, Title: "Go", Body: "Go is great", CreatedAt: now, DocKind: int64(NoteDoc)}, 209 + {ID: 2, Title: "Go Tutorial", Body: "Learn Go", CreatedAt: now, DocKind: int64(NoteDoc)}, 210 + } 211 + 212 + idx := BuildIndex(docs) 213 + 214 + postings := idx.Postings["go"] 215 + if len(postings) != 2 { 216 + t.Fatalf("expected 2 postings for 'go', got %d", len(postings)) 217 + } 218 + }) 219 + } 220 + 221 + func TestIndexSearch(t *testing.T) { 222 + now := time.Now() 223 + 224 + t.Run("Search functionality", func(t *testing.T) { 225 + t.Run("returns empty results for empty query", func(t *testing.T) { 226 + docs := []Document{ 227 + {ID: 1, Title: "Test", Body: "content", CreatedAt: now, DocKind: int64(NoteDoc)}, 228 + } 229 + idx := BuildIndex(docs) 230 + 231 + results, err := idx.Search("", 10) 232 + if err != nil { 233 + t.Fatalf("unexpected error: %v", err) 234 + } 235 + if len(results) != 0 { 236 + t.Errorf("expected 0 results for empty query, got %d", len(results)) 237 + } 238 + }) 239 + 240 + t.Run("finds matching documents", func(t *testing.T) { 241 + docs := []Document{ 242 + {ID: 1, Title: "Go Programming", Body: "Learn Go language", CreatedAt: now, DocKind: int64(NoteDoc)}, 243 + {ID: 2, Title: "Python Guide", Body: "Python is versatile", CreatedAt: now, DocKind: int64(ArticleDoc)}, 244 + } 245 + idx := BuildIndex(docs) 246 + 247 + results, err := idx.Search("go", 10) 248 + if err != nil { 249 + t.Fatalf("unexpected error: %v", err) 250 + } 251 + 252 + if len(results) != 1 { 253 + t.Fatalf("expected 1 result, got %d", len(results)) 254 + } 255 + 256 + if results[0].DocID != 1 { 257 + t.Errorf("expected DocID 1, got %d", results[0].DocID) 258 + } 259 + 260 + if results[0].Score <= 0 { 261 + t.Error("expected positive score") 262 + } 263 + }) 264 + 265 + t.Run("ranks documents by relevance", func(t *testing.T) { 266 + docs := []Document{ 267 + {ID: 1, Title: "Go", Body: "tutorial python rust", CreatedAt: now, DocKind: int64(NoteDoc)}, 268 + {ID: 2, Title: "Go Programming", Body: "advanced go tutorial", CreatedAt: now, DocKind: int64(NoteDoc)}, 269 + {ID: 3, Title: "Python", Body: "different language", CreatedAt: now, DocKind: int64(NoteDoc)}, 270 + } 271 + idx := BuildIndex(docs) 272 + 273 + results, err := idx.Search("go", 10) 274 + if err != nil { 275 + t.Fatalf("unexpected error: %v", err) 276 + } 277 + 278 + if len(results) != 2 { 279 + t.Fatalf("expected 2 results, got %d", len(results)) 280 + } 281 + 282 + if results[0].DocID != 2 { 283 + t.Errorf("expected document 2 to rank higher (has more 'go' terms)") 284 + } 285 + 286 + if results[0].Score <= results[1].Score { 287 + t.Errorf("expected first result to have higher score, got %f <= %f", results[0].Score, results[1].Score) 288 + } 289 + }) 290 + 291 + t.Run("respects limit parameter", func(t *testing.T) { 292 + docs := []Document{ 293 + {ID: 1, Title: "test one", Body: "content", CreatedAt: now, DocKind: int64(NoteDoc)}, 294 + {ID: 2, Title: "test two", Body: "content", CreatedAt: now, DocKind: int64(NoteDoc)}, 295 + {ID: 3, Title: "test three", Body: "content", CreatedAt: now, DocKind: int64(NoteDoc)}, 296 + } 297 + idx := BuildIndex(docs) 298 + 299 + results, err := idx.Search("test", 2) 300 + if err != nil { 301 + t.Fatalf("unexpected error: %v", err) 302 + } 303 + 304 + if len(results) != 2 { 305 + t.Errorf("expected 2 results with limit=2, got %d", len(results)) 306 + } 307 + }) 308 + 309 + t.Run("handles multi-term queries", func(t *testing.T) { 310 + docs := []Document{ 311 + {ID: 1, Title: "Go Programming", Body: "advanced tutorial", CreatedAt: now, DocKind: int64(NoteDoc)}, 312 + {ID: 2, Title: "Go Basics", Body: "beginner tutorial", CreatedAt: now, DocKind: int64(NoteDoc)}, 313 + {ID: 3, Title: "Python", Body: "different language", CreatedAt: now, DocKind: int64(NoteDoc)}, 314 + } 315 + idx := BuildIndex(docs) 316 + 317 + results, err := idx.Search("go tutorial", 10) 318 + if err != nil { 319 + t.Fatalf("unexpected error: %v", err) 320 + } 321 + 322 + if len(results) != 2 { 323 + t.Errorf("expected 2 results, got %d", len(results)) 324 + } 325 + }) 326 + 327 + t.Run("returns no results for non-matching query", func(t *testing.T) { 328 + docs := []Document{ 329 + {ID: 1, Title: "Go", Body: "programming", CreatedAt: now, DocKind: int64(NoteDoc)}, 330 + } 331 + idx := BuildIndex(docs) 332 + 333 + results, err := idx.Search("rust", 10) 334 + if err != nil { 335 + t.Fatalf("unexpected error: %v", err) 336 + } 337 + 338 + if len(results) != 0 { 339 + t.Errorf("expected 0 results for non-matching query, got %d", len(results)) 340 + } 341 + }) 342 + 343 + t.Run("handles zero limit", func(t *testing.T) { 344 + docs := []Document{ 345 + {ID: 1, Title: "test", Body: "content", CreatedAt: now, DocKind: int64(NoteDoc)}, 346 + } 347 + idx := BuildIndex(docs) 348 + 349 + results, err := idx.Search("test", 0) 350 + if err != nil { 351 + t.Fatalf("unexpected error: %v", err) 352 + } 353 + 354 + if len(results) != 1 { 355 + t.Errorf("expected all results with limit=0, got %d", len(results)) 356 + } 357 + }) 358 + 359 + t.Run("tie-breaking uses DocID", func(t *testing.T) { 360 + docs := []Document{ 361 + {ID: 1, Title: "test", Body: "content", CreatedAt: now, DocKind: int64(NoteDoc)}, 362 + {ID: 2, Title: "test", Body: "content", CreatedAt: now, DocKind: int64(NoteDoc)}, 363 + } 364 + idx := BuildIndex(docs) 365 + 366 + results, err := idx.Search("test", 10) 367 + if err != nil { 368 + t.Fatalf("unexpected error: %v", err) 369 + } 370 + 371 + if len(results) != 2 { 372 + t.Fatalf("expected 2 results, got %d", len(results)) 373 + } 374 + 375 + if results[0].DocID <= results[1].DocID { 376 + t.Error("expected higher DocID first when scores are equal") 377 + } 378 + }) 379 + }) 380 + }
+1
internal/documents/test_utilities.go
··· 1 + package documents
+85
internal/handlers/documents.go
··· 1 + package handlers 2 + 3 + import ( 4 + "context" 5 + "database/sql" 6 + "fmt" 7 + 8 + "github.com/stormlightlabs/noteleaf/internal/repo" 9 + "github.com/stormlightlabs/noteleaf/internal/store" 10 + ) 11 + 12 + // DocumentHandler provides operations for document search 13 + type DocumentHandler struct { 14 + db *sql.DB 15 + repos *repo.Repositories 16 + engine *repo.SearchEngine 17 + } 18 + 19 + // NewDocumentHandler creates a new document handler 20 + func NewDocumentHandler() (*DocumentHandler, error) { 21 + db, err := store.NewDatabase() 22 + if err != nil { 23 + return nil, fmt.Errorf("failed to initialize database: %w", err) 24 + } 25 + 26 + repos := repo.NewRepositories(db.DB) 27 + engine := repo.NewSearchEngine(repos.Documents) 28 + 29 + return &DocumentHandler{ 30 + db: db.DB, 31 + repos: repos, 32 + engine: engine, 33 + }, nil 34 + } 35 + 36 + // RebuildIndex rebuilds the search index from notes 37 + func (h *DocumentHandler) RebuildIndex(ctx context.Context) error { 38 + if err := h.repos.Documents.DeleteAll(ctx); err != nil { 39 + return fmt.Errorf("failed to clear documents: %w", err) 40 + } 41 + 42 + if err := h.repos.Documents.RebuildFromNotes(ctx, h.repos.Notes); err != nil { 43 + return fmt.Errorf("failed to rebuild from notes: %w", err) 44 + } 45 + 46 + if err := h.engine.Rebuild(ctx); err != nil { 47 + return fmt.Errorf("failed to rebuild search index: %w", err) 48 + } 49 + 50 + fmt.Println("Search index rebuilt successfully") 51 + return nil 52 + } 53 + 54 + // Search performs a TF-IDF search and displays results 55 + func (h *DocumentHandler) Search(ctx context.Context, query string, limit int) error { 56 + if err := h.engine.Rebuild(ctx); err != nil { 57 + return fmt.Errorf("failed to rebuild index: %w", err) 58 + } 59 + 60 + results, docs, err := h.engine.SearchWithScores(ctx, query, limit) 61 + if err != nil { 62 + return fmt.Errorf("search failed: %w", err) 63 + } 64 + 65 + if len(results) == 0 { 66 + fmt.Println("No results found") 67 + return nil 68 + } 69 + 70 + fmt.Printf("Found %d results:\n\n", len(results)) 71 + for i, doc := range docs { 72 + score := results[i].Score 73 + fmt.Printf("%d. [Score: %.2f] %s\n", i+1, score, doc.Title) 74 + fmt.Printf(" %s\n\n", truncate(doc.Body, 100)) 75 + } 76 + 77 + return nil 78 + } 79 + 80 + func truncate(s string, maxLen int) string { 81 + if len(s) <= maxLen { 82 + return s 83 + } 84 + return s[:maxLen] + "..." 85 + }
+238
internal/repo/document_repository.go
··· 1 + package repo 2 + 3 + import ( 4 + "context" 5 + "database/sql" 6 + "fmt" 7 + 8 + "github.com/stormlightlabs/noteleaf/internal/documents" 9 + ) 10 + 11 + func DocumentNotFoundError(id int64) error { 12 + return fmt.Errorf("document with id %d not found", id) 13 + } 14 + 15 + // DocumentRepository provides database operations for documents 16 + type DocumentRepository struct { 17 + db *sql.DB 18 + } 19 + 20 + // NewDocumentRepository creates a new document repository 21 + func NewDocumentRepository(db *sql.DB) *DocumentRepository { 22 + return &DocumentRepository{db: db} 23 + } 24 + 25 + // scanDocument scans a database row into a Document model 26 + func (r *DocumentRepository) scanDocument(s scanner) (*documents.Document, error) { 27 + var doc documents.Document 28 + err := s.Scan(&doc.ID, &doc.Title, &doc.Body, &doc.CreatedAt, &doc.DocKind) 29 + if err != nil { 30 + return nil, err 31 + } 32 + return &doc, nil 33 + } 34 + 35 + // queryOne executes a query that returns a single document 36 + func (r *DocumentRepository) queryOne(ctx context.Context, query string, args ...any) (*documents.Document, error) { 37 + row := r.db.QueryRowContext(ctx, query, args...) 38 + doc, err := r.scanDocument(row) 39 + if err != nil { 40 + if err == sql.ErrNoRows { 41 + return nil, fmt.Errorf("document not found") 42 + } 43 + return nil, fmt.Errorf("failed to scan document: %w", err) 44 + } 45 + return doc, nil 46 + } 47 + 48 + // queryMany executes a query that returns multiple documents 49 + func (r *DocumentRepository) queryMany(ctx context.Context, query string, args ...any) ([]documents.Document, error) { 50 + rows, err := r.db.QueryContext(ctx, query, args...) 51 + if err != nil { 52 + return nil, fmt.Errorf("failed to query documents: %w", err) 53 + } 54 + defer rows.Close() 55 + 56 + var docs []documents.Document 57 + for rows.Next() { 58 + doc, err := r.scanDocument(rows) 59 + if err != nil { 60 + return nil, fmt.Errorf("failed to scan document: %w", err) 61 + } 62 + docs = append(docs, *doc) 63 + } 64 + 65 + if err := rows.Err(); err != nil { 66 + return nil, fmt.Errorf("error iterating over documents: %w", err) 67 + } 68 + 69 + return docs, nil 70 + } 71 + 72 + // Create stores a new document and returns its assigned ID 73 + func (r *DocumentRepository) Create(ctx context.Context, doc *documents.Document) (int64, error) { 74 + result, err := r.db.ExecContext(ctx, queryDocumentInsert, 75 + doc.Title, doc.Body, doc.CreatedAt, doc.DocKind) 76 + if err != nil { 77 + return 0, fmt.Errorf("failed to insert document: %w", err) 78 + } 79 + 80 + id, err := result.LastInsertId() 81 + if err != nil { 82 + return 0, fmt.Errorf("failed to get last insert id: %w", err) 83 + } 84 + 85 + doc.ID = id 86 + return id, nil 87 + } 88 + 89 + // Get retrieves a document by its ID 90 + func (r *DocumentRepository) Get(ctx context.Context, id int64) (*documents.Document, error) { 91 + doc, err := r.queryOne(ctx, queryDocumentByID, id) 92 + if err != nil { 93 + return nil, DocumentNotFoundError(id) 94 + } 95 + return doc, nil 96 + } 97 + 98 + // Delete removes a document by its ID 99 + func (r *DocumentRepository) Delete(ctx context.Context, id int64) error { 100 + result, err := r.db.ExecContext(ctx, queryDocumentDelete, id) 101 + if err != nil { 102 + return fmt.Errorf("failed to delete document: %w", err) 103 + } 104 + 105 + rowsAffected, err := result.RowsAffected() 106 + if err != nil { 107 + return fmt.Errorf("failed to get rows affected: %w", err) 108 + } 109 + 110 + if rowsAffected == 0 { 111 + return DocumentNotFoundError(id) 112 + } 113 + 114 + return nil 115 + } 116 + 117 + // List retrieves all documents 118 + func (r *DocumentRepository) List(ctx context.Context) ([]documents.Document, error) { 119 + return r.queryMany(ctx, queryDocumentsList) 120 + } 121 + 122 + // ListByKind retrieves documents of a specific kind 123 + func (r *DocumentRepository) ListByKind(ctx context.Context, kind documents.DocKind) ([]documents.Document, error) { 124 + return r.queryMany(ctx, queryDocumentsByKind, int64(kind)) 125 + } 126 + 127 + // DeleteAll removes all documents from the database 128 + func (r *DocumentRepository) DeleteAll(ctx context.Context) error { 129 + _, err := r.db.ExecContext(ctx, queryDocumentsDeleteAll) 130 + if err != nil { 131 + return fmt.Errorf("failed to delete all documents: %w", err) 132 + } 133 + return nil 134 + } 135 + 136 + // RebuildFromNotes rebuilds the documents table from notes 137 + func (r *DocumentRepository) RebuildFromNotes(ctx context.Context, noteRepo *NoteRepository) error { 138 + notes, err := noteRepo.List(ctx, NoteListOptions{}) 139 + if err != nil { 140 + return fmt.Errorf("failed to list notes: %w", err) 141 + } 142 + 143 + for _, note := range notes { 144 + doc := &documents.Document{ 145 + Title: note.Title, 146 + Body: note.Content, 147 + CreatedAt: note.Created, 148 + DocKind: int64(documents.NoteDoc), 149 + } 150 + 151 + if _, err := r.Create(ctx, doc); err != nil { 152 + return fmt.Errorf("failed to create document from note %d: %w", note.ID, err) 153 + } 154 + } 155 + 156 + return nil 157 + } 158 + 159 + // BuildIndex creates a TF-IDF index from all documents in the database 160 + func (r *DocumentRepository) BuildIndex(ctx context.Context) (*documents.Index, error) { 161 + docs, err := r.List(ctx) 162 + if err != nil { 163 + return nil, fmt.Errorf("failed to list documents: %w", err) 164 + } 165 + 166 + return documents.BuildIndex(docs), nil 167 + } 168 + 169 + // SearchEngine wraps a DocumentRepository with search capabilities 170 + type SearchEngine struct { 171 + repo *DocumentRepository 172 + index *documents.Index 173 + } 174 + 175 + // NewSearchEngine creates a new search engine with the given repository 176 + func NewSearchEngine(repo *DocumentRepository) *SearchEngine { 177 + return &SearchEngine{ 178 + repo: repo, 179 + index: nil, 180 + } 181 + } 182 + 183 + // Rebuild rebuilds the search index from the database 184 + func (se *SearchEngine) Rebuild(ctx context.Context) error { 185 + idx, err := se.repo.BuildIndex(ctx) 186 + if err != nil { 187 + return fmt.Errorf("failed to build index: %w", err) 188 + } 189 + 190 + se.index = idx 191 + return nil 192 + } 193 + 194 + // Search performs a TF-IDF search and returns matching documents 195 + func (se *SearchEngine) Search(ctx context.Context, query string, limit int) ([]documents.Document, error) { 196 + if se.index == nil { 197 + return nil, fmt.Errorf("search index not initialized") 198 + } 199 + 200 + results, err := se.index.Search(query, limit) 201 + if err != nil { 202 + return nil, fmt.Errorf("failed to search: %w", err) 203 + } 204 + 205 + docs := make([]documents.Document, 0, len(results)) 206 + for _, result := range results { 207 + doc, err := se.repo.Get(ctx, result.DocID) 208 + if err != nil { 209 + return nil, fmt.Errorf("failed to get document %d: %w", result.DocID, err) 210 + } 211 + docs = append(docs, *doc) 212 + } 213 + 214 + return docs, nil 215 + } 216 + 217 + // SearchWithScores performs a TF-IDF search and returns results with scores 218 + func (se *SearchEngine) SearchWithScores(ctx context.Context, query string, limit int) ([]documents.Result, []documents.Document, error) { 219 + if se.index == nil { 220 + return nil, nil, fmt.Errorf("search index not initialized") 221 + } 222 + 223 + results, err := se.index.Search(query, limit) 224 + if err != nil { 225 + return nil, nil, fmt.Errorf("failed to search: %w", err) 226 + } 227 + 228 + docs := make([]documents.Document, 0, len(results)) 229 + for _, result := range results { 230 + doc, err := se.repo.Get(ctx, result.DocID) 231 + if err != nil { 232 + return nil, nil, fmt.Errorf("failed to get document %d: %w", result.DocID, err) 233 + } 234 + docs = append(docs, *doc) 235 + } 236 + 237 + return results, docs, nil 238 + }
+379
internal/repo/document_repository_test.go
··· 1 + package repo 2 + 3 + import ( 4 + "context" 5 + "testing" 6 + "time" 7 + 8 + "github.com/stormlightlabs/noteleaf/internal/documents" 9 + "github.com/stormlightlabs/noteleaf/internal/shared" 10 + ) 11 + 12 + func CreateSampleDocument() *documents.Document { 13 + return &documents.Document{ 14 + Title: "Test Document", 15 + Body: "This is test content for searching", 16 + CreatedAt: time.Now(), 17 + DocKind: int64(documents.NoteDoc), 18 + } 19 + } 20 + 21 + func TestDocumentRepository(t *testing.T) { 22 + db := CreateTestDB(t) 23 + repo := NewDocumentRepository(db) 24 + ctx := context.Background() 25 + 26 + t.Run("Create", func(t *testing.T) { 27 + t.Run("creates document successfully", func(t *testing.T) { 28 + doc := CreateSampleDocument() 29 + id, err := repo.Create(ctx, doc) 30 + 31 + shared.AssertNoError(t, err, "create should succeed") 32 + shared.AssertTrue(t, id > 0, "id should be positive") 33 + shared.AssertEqual(t, id, doc.ID, "document ID should be set") 34 + }) 35 + 36 + t.Run("returns error with cancelled context", func(t *testing.T) { 37 + doc := CreateSampleDocument() 38 + canceledCtx := NewCanceledContext() 39 + 40 + _, err := repo.Create(canceledCtx, doc) 41 + AssertCancelledContext(t, err) 42 + }) 43 + }) 44 + 45 + t.Run("Get", func(t *testing.T) { 46 + t.Run("retrieves existing document", func(t *testing.T) { 47 + doc := CreateSampleDocument() 48 + id, err := repo.Create(ctx, doc) 49 + shared.AssertNoError(t, err, "create should succeed") 50 + 51 + retrieved, err := repo.Get(ctx, id) 52 + shared.AssertNoError(t, err, "get should succeed") 53 + shared.AssertEqual(t, doc.Title, retrieved.Title, "title should match") 54 + shared.AssertEqual(t, doc.Body, retrieved.Body, "body should match") 55 + shared.AssertEqual(t, doc.DocKind, retrieved.DocKind, "doc kind should match") 56 + }) 57 + 58 + t.Run("returns error for non-existent document", func(t *testing.T) { 59 + _, err := repo.Get(ctx, 99999) 60 + shared.AssertError(t, err, "should return error for non-existent document") 61 + shared.AssertContains(t, err.Error(), "not found", "error should mention not found") 62 + }) 63 + 64 + t.Run("returns error with cancelled context", func(t *testing.T) { 65 + canceledCtx := NewCanceledContext() 66 + _, err := repo.Get(canceledCtx, 1) 67 + AssertCancelledContext(t, err) 68 + }) 69 + }) 70 + 71 + t.Run("Delete", func(t *testing.T) { 72 + t.Run("deletes existing document", func(t *testing.T) { 73 + doc := CreateSampleDocument() 74 + id, err := repo.Create(ctx, doc) 75 + shared.AssertNoError(t, err, "create should succeed") 76 + 77 + err = repo.Delete(ctx, id) 78 + shared.AssertNoError(t, err, "delete should succeed") 79 + 80 + _, err = repo.Get(ctx, id) 81 + shared.AssertError(t, err, "get after delete should fail") 82 + }) 83 + 84 + t.Run("returns error for non-existent document", func(t *testing.T) { 85 + err := repo.Delete(ctx, 99999) 86 + shared.AssertError(t, err, "should return error for non-existent document") 87 + }) 88 + 89 + t.Run("returns error with cancelled context", func(t *testing.T) { 90 + canceledCtx := NewCanceledContext() 91 + err := repo.Delete(canceledCtx, 1) 92 + AssertCancelledContext(t, err) 93 + }) 94 + }) 95 + 96 + t.Run("List", func(t *testing.T) { 97 + t.Run("returns all documents", func(t *testing.T) { 98 + db := CreateTestDB(t) 99 + repo := NewDocumentRepository(db) 100 + 101 + doc1 := CreateSampleDocument() 102 + doc1.Title = "First" 103 + doc2 := CreateSampleDocument() 104 + doc2.Title = "Second" 105 + 106 + _, err := repo.Create(ctx, doc1) 107 + shared.AssertNoError(t, err, "create doc1 should succeed") 108 + _, err = repo.Create(ctx, doc2) 109 + shared.AssertNoError(t, err, "create doc2 should succeed") 110 + 111 + docs, err := repo.List(ctx) 112 + shared.AssertNoError(t, err, "list should succeed") 113 + shared.AssertEqual(t, 2, len(docs), "should return 2 documents") 114 + }) 115 + 116 + t.Run("returns empty list when no documents exist", func(t *testing.T) { 117 + db := CreateTestDB(t) 118 + repo := NewDocumentRepository(db) 119 + 120 + docs, err := repo.List(ctx) 121 + shared.AssertNoError(t, err, "list should succeed") 122 + shared.AssertEqual(t, 0, len(docs), "should return empty list") 123 + }) 124 + 125 + t.Run("returns error with cancelled context", func(t *testing.T) { 126 + canceledCtx := NewCanceledContext() 127 + _, err := repo.List(canceledCtx) 128 + AssertCancelledContext(t, err) 129 + }) 130 + }) 131 + 132 + t.Run("ListByKind", func(t *testing.T) { 133 + t.Run("filters documents by kind", func(t *testing.T) { 134 + db := CreateTestDB(t) 135 + repo := NewDocumentRepository(db) 136 + 137 + noteDoc := CreateSampleDocument() 138 + noteDoc.DocKind = int64(documents.NoteDoc) 139 + articleDoc := CreateSampleDocument() 140 + articleDoc.DocKind = int64(documents.ArticleDoc) 141 + 142 + _, err := repo.Create(ctx, noteDoc) 143 + shared.AssertNoError(t, err, "create note should succeed") 144 + _, err = repo.Create(ctx, articleDoc) 145 + shared.AssertNoError(t, err, "create article should succeed") 146 + 147 + notes, err := repo.ListByKind(ctx, documents.NoteDoc) 148 + shared.AssertNoError(t, err, "list by kind should succeed") 149 + shared.AssertEqual(t, 1, len(notes), "should return 1 note") 150 + shared.AssertEqual(t, int64(documents.NoteDoc), notes[0].DocKind, "should be note kind") 151 + }) 152 + 153 + t.Run("returns error with cancelled context", func(t *testing.T) { 154 + canceledCtx := NewCanceledContext() 155 + _, err := repo.ListByKind(canceledCtx, documents.NoteDoc) 156 + AssertCancelledContext(t, err) 157 + }) 158 + }) 159 + 160 + t.Run("DeleteAll", func(t *testing.T) { 161 + t.Run("removes all documents", func(t *testing.T) { 162 + db := CreateTestDB(t) 163 + repo := NewDocumentRepository(db) 164 + 165 + _, err := repo.Create(ctx, CreateSampleDocument()) 166 + shared.AssertNoError(t, err, "create should succeed") 167 + _, err = repo.Create(ctx, CreateSampleDocument()) 168 + shared.AssertNoError(t, err, "create should succeed") 169 + 170 + err = repo.DeleteAll(ctx) 171 + shared.AssertNoError(t, err, "delete all should succeed") 172 + 173 + docs, err := repo.List(ctx) 174 + shared.AssertNoError(t, err, "list should succeed") 175 + shared.AssertEqual(t, 0, len(docs), "should have no documents after delete all") 176 + }) 177 + 178 + t.Run("returns error with cancelled context", func(t *testing.T) { 179 + canceledCtx := NewCanceledContext() 180 + err := repo.DeleteAll(canceledCtx) 181 + AssertCancelledContext(t, err) 182 + }) 183 + }) 184 + 185 + t.Run("RebuildFromNotes", func(t *testing.T) { 186 + t.Run("creates documents from notes", func(t *testing.T) { 187 + db := CreateTestDB(t) 188 + repo := NewDocumentRepository(db) 189 + noteRepo := NewNoteRepository(db) 190 + 191 + note1 := CreateSampleNote() 192 + note1.Title = "Note 1" 193 + note2 := CreateSampleNote() 194 + note2.Title = "Note 2" 195 + 196 + _, err := noteRepo.Create(ctx, note1) 197 + shared.AssertNoError(t, err, "create note1 should succeed") 198 + _, err = noteRepo.Create(ctx, note2) 199 + shared.AssertNoError(t, err, "create note2 should succeed") 200 + 201 + err = repo.RebuildFromNotes(ctx, noteRepo) 202 + shared.AssertNoError(t, err, "rebuild should succeed") 203 + 204 + docs, err := repo.List(ctx) 205 + shared.AssertNoError(t, err, "list should succeed") 206 + shared.AssertEqual(t, 2, len(docs), "should have 2 documents from notes") 207 + }) 208 + 209 + t.Run("returns error with cancelled context", func(t *testing.T) { 210 + db := CreateTestDB(t) 211 + repo := NewDocumentRepository(db) 212 + noteRepo := NewNoteRepository(db) 213 + canceledCtx := NewCanceledContext() 214 + 215 + err := repo.RebuildFromNotes(canceledCtx, noteRepo) 216 + AssertCancelledContext(t, err) 217 + }) 218 + }) 219 + 220 + t.Run("BuildIndex", func(t *testing.T) { 221 + t.Run("creates search index from documents", func(t *testing.T) { 222 + db := CreateTestDB(t) 223 + repo := NewDocumentRepository(db) 224 + 225 + doc1 := CreateSampleDocument() 226 + doc1.Title = "Go Programming" 227 + doc1.Body = "Learn Go language" 228 + doc2 := CreateSampleDocument() 229 + doc2.Title = "Python Guide" 230 + doc2.Body = "Python tutorial" 231 + 232 + _, err := repo.Create(ctx, doc1) 233 + shared.AssertNoError(t, err, "create doc1 should succeed") 234 + _, err = repo.Create(ctx, doc2) 235 + shared.AssertNoError(t, err, "create doc2 should succeed") 236 + 237 + idx, err := repo.BuildIndex(ctx) 238 + shared.AssertNoError(t, err, "build index should succeed") 239 + shared.AssertNotNil(t, idx, "index should not be nil") 240 + shared.AssertEqual(t, 2, idx.NumDocs, "index should contain 2 documents") 241 + }) 242 + 243 + t.Run("handles empty document set", func(t *testing.T) { 244 + db := CreateTestDB(t) 245 + repo := NewDocumentRepository(db) 246 + 247 + idx, err := repo.BuildIndex(ctx) 248 + shared.AssertNoError(t, err, "build index should succeed with empty set") 249 + shared.AssertEqual(t, 0, idx.NumDocs, "index should be empty") 250 + }) 251 + 252 + t.Run("returns error with cancelled context", func(t *testing.T) { 253 + canceledCtx := NewCanceledContext() 254 + _, err := repo.BuildIndex(canceledCtx) 255 + AssertCancelledContext(t, err) 256 + }) 257 + }) 258 + } 259 + 260 + func TestSearchEngine(t *testing.T) { 261 + db := CreateTestDB(t) 262 + docRepo := NewDocumentRepository(db) 263 + ctx := context.Background() 264 + 265 + doc1 := CreateSampleDocument() 266 + doc1.Title = "Go Programming" 267 + doc1.Body = "Learn Go programming language with examples" 268 + doc2 := CreateSampleDocument() 269 + doc2.Title = "Python Tutorial" 270 + doc2.Body = "Python is a versatile programming language" 271 + doc3 := CreateSampleDocument() 272 + doc3.Title = "Go Advanced" 273 + doc3.Body = "Advanced Go concepts and patterns" 274 + 275 + _, err := docRepo.Create(ctx, doc1) 276 + shared.AssertNoError(t, err, "create doc1 should succeed") 277 + _, err = docRepo.Create(ctx, doc2) 278 + shared.AssertNoError(t, err, "create doc2 should succeed") 279 + _, err = docRepo.Create(ctx, doc3) 280 + shared.AssertNoError(t, err, "create doc3 should succeed") 281 + 282 + t.Run("Rebuild", func(t *testing.T) { 283 + t.Run("builds search index", func(t *testing.T) { 284 + engine := NewSearchEngine(docRepo) 285 + err := engine.Rebuild(ctx) 286 + shared.AssertNoError(t, err, "rebuild should succeed") 287 + shared.AssertNotNil(t, engine.index, "index should be set after rebuild") 288 + }) 289 + 290 + t.Run("returns error with cancelled context", func(t *testing.T) { 291 + engine := NewSearchEngine(docRepo) 292 + canceledCtx := NewCanceledContext() 293 + err := engine.Rebuild(canceledCtx) 294 + AssertCancelledContext(t, err) 295 + }) 296 + }) 297 + 298 + t.Run("Search", func(t *testing.T) { 299 + t.Run("returns error when index not initialized", func(t *testing.T) { 300 + engine := NewSearchEngine(docRepo) 301 + _, err := engine.Search(ctx, "go", 10) 302 + shared.AssertError(t, err, "should error when index not initialized") 303 + shared.AssertContains(t, err.Error(), "not initialized", "error should mention not initialized") 304 + }) 305 + 306 + t.Run("finds matching documents", func(t *testing.T) { 307 + engine := NewSearchEngine(docRepo) 308 + err := engine.Rebuild(ctx) 309 + shared.AssertNoError(t, err, "rebuild should succeed") 310 + 311 + docs, err := engine.Search(ctx, "go", 10) 312 + shared.AssertNoError(t, err, "search should succeed") 313 + shared.AssertTrue(t, len(docs) >= 2, "should find at least 2 documents with 'go'") 314 + }) 315 + 316 + t.Run("returns empty results for non-matching query", func(t *testing.T) { 317 + engine := NewSearchEngine(docRepo) 318 + err := engine.Rebuild(ctx) 319 + shared.AssertNoError(t, err, "rebuild should succeed") 320 + 321 + docs, err := engine.Search(ctx, "rust", 10) 322 + shared.AssertNoError(t, err, "search should succeed") 323 + shared.AssertEqual(t, 0, len(docs), "should return no results for non-matching query") 324 + }) 325 + 326 + t.Run("respects limit parameter", func(t *testing.T) { 327 + engine := NewSearchEngine(docRepo) 328 + err := engine.Rebuild(ctx) 329 + shared.AssertNoError(t, err, "rebuild should succeed") 330 + 331 + docs, err := engine.Search(ctx, "programming", 1) 332 + shared.AssertNoError(t, err, "search should succeed") 333 + shared.AssertTrue(t, len(docs) <= 1, "should respect limit parameter") 334 + }) 335 + 336 + t.Run("returns error with cancelled context", func(t *testing.T) { 337 + engine := NewSearchEngine(docRepo) 338 + err := engine.Rebuild(ctx) 339 + shared.AssertNoError(t, err, "rebuild should succeed") 340 + 341 + canceledCtx := NewCanceledContext() 342 + _, err = engine.Search(canceledCtx, "go", 10) 343 + AssertCancelledContext(t, err) 344 + }) 345 + }) 346 + 347 + t.Run("SearchWithScores", func(t *testing.T) { 348 + t.Run("returns results with scores", func(t *testing.T) { 349 + engine := NewSearchEngine(docRepo) 350 + err := engine.Rebuild(ctx) 351 + shared.AssertNoError(t, err, "rebuild should succeed") 352 + 353 + results, docs, err := engine.SearchWithScores(ctx, "go", 10) 354 + shared.AssertNoError(t, err, "search should succeed") 355 + shared.AssertEqual(t, len(results), len(docs), "results and docs should have same length") 356 + shared.AssertTrue(t, len(results) >= 2, "should find at least 2 results") 357 + 358 + for _, result := range results { 359 + shared.AssertTrue(t, result.Score > 0, "score should be positive") 360 + } 361 + }) 362 + 363 + t.Run("returns error when index not initialized", func(t *testing.T) { 364 + engine := NewSearchEngine(docRepo) 365 + _, _, err := engine.SearchWithScores(ctx, "go", 10) 366 + shared.AssertError(t, err, "should error when index not initialized") 367 + }) 368 + 369 + t.Run("returns error with cancelled context", func(t *testing.T) { 370 + engine := NewSearchEngine(docRepo) 371 + err := engine.Rebuild(ctx) 372 + shared.AssertNoError(t, err, "rebuild should succeed") 373 + 374 + canceledCtx := NewCanceledContext() 375 + _, _, err = engine.SearchWithScores(canceledCtx, "go", 10) 376 + AssertCancelledContext(t, err) 377 + }) 378 + }) 379 + }
+10
internal/repo/queries.go
··· 40 40 queryTasksList = "SELECT " + taskColumns + " FROM tasks" 41 41 ) 42 42 43 + const ( 44 + documentColumns = "id, title, body, created_at, doc_kind" 45 + queryDocumentByID = "SELECT " + documentColumns + " FROM documents WHERE id = ?" 46 + queryDocumentInsert = `INSERT INTO documents (title, body, created_at, doc_kind) VALUES (?, ?, ?, ?)` 47 + queryDocumentDelete = "DELETE FROM documents WHERE id = ?" 48 + queryDocumentsList = "SELECT " + documentColumns + " FROM documents ORDER BY created_at DESC" 49 + queryDocumentsByKind = "SELECT " + documentColumns + " FROM documents WHERE doc_kind = ? ORDER BY created_at DESC" 50 + queryDocumentsDeleteAll = "DELETE FROM documents" 51 + ) 52 + 43 53 type scanner interface { 44 54 Scan(dest ...any) error 45 55 }
+2
internal/repo/repo.go
··· 20 20 Notes *NoteRepository 21 21 TimeEntries *TimeEntryRepository 22 22 Articles *ArticleRepository 23 + Documents *DocumentRepository 23 24 } 24 25 25 26 // NewRepositories creates a new set of [Repositories] ··· 32 33 Notes: NewNoteRepository(db), 33 34 TimeEntries: NewTimeEntryRepository(db), 34 35 Articles: NewArticleRepository(db), 36 + Documents: NewDocumentRepository(db), 35 37 } 36 38 } 37 39
+3
internal/store/sql/migrations/0010_create_documents_table_down.sql
··· 1 + DROP INDEX IF EXISTS idx_documents_created_at; 2 + DROP INDEX IF EXISTS idx_documents_doc_kind; 3 + DROP TABLE IF EXISTS documents;
+10
internal/store/sql/migrations/0010_create_documents_table_up.sql
··· 1 + CREATE TABLE IF NOT EXISTS documents ( 2 + id INTEGER PRIMARY KEY AUTOINCREMENT, 3 + title TEXT NOT NULL, 4 + body TEXT NOT NULL, 5 + created_at DATETIME NOT NULL, 6 + doc_kind INTEGER NOT NULL 7 + ); 8 + 9 + CREATE INDEX IF NOT EXISTS idx_documents_doc_kind ON documents(doc_kind); 10 + CREATE INDEX IF NOT EXISTS idx_documents_created_at ON documents(created_at);