rss email digests over ssh because you're a cool kid herald.dunkirk.sh
go rss rss-reader ssh charm

feat: fix query and cleanup in db

dunkirk.sh faeb5870 1e7cd87d

verified
Changed files
+188 -33
scheduler
store
web
+21 -23
fix.md
··· 9 9 - ✅ #14: No Rate Limiting (SSH, SCP, web, email) 10 10 - ✅ #15: Token Generation (verify crypto/rand usage) 11 11 12 - ## **P1 - High (Performance & Reliability)** 12 + ## **P1 - High (Performance & Reliability)** ✅ COMPLETE 13 13 14 - - #8: N+1 Query Problem (batch operations) 15 - - #26: No Cleanup of Old seen_items (6 month cleanup job) 16 - - #23: Missing Graceful Shutdown for Scheduler (panic recovery) 14 + - ✅ #8: N+1 Query Problem (batch operations) 15 + - ✅ #26: No Cleanup of Old seen_items (6 month cleanup job) 16 + - ✅ #23: Missing Graceful Shutdown for Scheduler (panic recovery) 17 17 18 18 ## **P2 - Medium (Code Quality & UX)** 19 19 ··· 115 115 116 116 ## **Performance Issues** 117 117 118 - ### 8. **N+1 Query Problem** 🐌 118 + ### 8. **N+1 Query Problem** ✅ 119 119 120 120 **Location:** Multiple locations 121 121 122 122 - `web/handlers.go:99-103` - Gets feeds for each config in a loop 123 123 - `scheduler/scheduler.go` - Checks each item individually for seen status 124 124 125 - **Fix:** Batch operations: 126 - 127 - ```go 128 - // Instead of checking each item individually 129 - seenGuids, err := s.store.GetSeenGUIDs(ctx, feedID, itemGUIDs) 130 - ``` 125 + **Fixed:** 126 + - Added `GetSeenGUIDs()` batch method in `store/items.go` to check multiple items at once 127 + - Added `GetFeedsByConfigs()` batch method in `store/feeds.go` to fetch feeds for multiple configs 128 + - Updated `scheduler/scheduler.go:collectNewItems()` to use batch GUID checking 129 + - Updated `web/handlers.go` dashboard handler to batch fetch all feeds 131 130 132 131 ### 9. **No Prepared Statements** 📝 133 132 ··· 233 232 234 233 Some errors are `logger.Error`, some are `logger.Warn`. For example, feed fetch errors are `Warn` (line 89 of scheduler.go) but other errors are `Error`. Establish consistent criteria. 235 234 236 - ### 23. **Missing Graceful Shutdown for Scheduler** 🛑 235 + ### 23. **Missing Graceful Shutdown for Scheduler** ✅ 237 236 238 237 **Location:** `main.go:194-197` 239 238 240 - The scheduler runs in a goroutine with errgroup, but `Start()` only returns on context cancellation. If scheduler panics, errgroup won't capture it. 241 - 242 - **Fix:** Add defer recover in scheduler or use errgroup.Go properly. 239 + **Fixed:** 240 + - Added panic recovery with defer in `scheduler/scheduler.go:tick()` 241 + - Added panic recovery wrapper in `main.go` scheduler goroutine 242 + - Added panic recovery in cleanup job 243 243 244 244 --- 245 245 ··· 253 253 254 254 You log `"email sent"` but don't verify SMTP actually accepted it (some SMTP servers queue and fail later). 255 255 256 - ### 26. **No Cleanup of Old seen_items** 🧹 257 - 258 - The `seen_items` table will grow indefinitely. With the 3-month filter, items older than 3 months can be safely deleted. 256 + ### 26. **No Cleanup of Old seen_items** ✅ 259 257 260 - **Fix:** Add periodic cleanup job: 261 - 262 - ```go 263 - DELETE FROM seen_items WHERE seen_at < datetime('now', '-6 months') 264 - ``` 258 + **Fixed:** 259 + - Added `CleanupOldSeenItems()` method in `store/items.go` to delete items older than specified duration 260 + - Added cleanup ticker in `scheduler/scheduler.go:Start()` that runs every 24 hours 261 + - Cleanup runs on startup and then daily, removing items older than 6 months 262 + - Added logging for cleanup operations showing number of items deleted 265 263 266 264 ### 27. **No Feed Validation on Upload** 🔍 267 265
+5
main.go
··· 192 192 }) 193 193 194 194 g.Go(func() error { 195 + defer func() { 196 + if r := recover(); r != nil { 197 + logger.Error("scheduler panic", "panic", r) 198 + } 199 + }() 195 200 sched.Start(ctx) 196 201 return nil 197 202 })
+49 -5
scheduler/scheduler.go
··· 36 36 ticker := time.NewTicker(s.interval) 37 37 defer ticker.Stop() 38 38 39 + // Cleanup ticker runs every 24 hours 40 + cleanupTicker := time.NewTicker(24 * time.Hour) 41 + defer cleanupTicker.Stop() 42 + 39 43 s.logger.Info("scheduler started", "interval", s.interval) 40 44 45 + // Run cleanup on start 46 + s.cleanupOldSeenItems(ctx) 47 + 41 48 for { 42 49 select { 43 50 case <-ctx.Done(): ··· 45 52 return 46 53 case <-ticker.C: 47 54 s.tick(ctx) 55 + case <-cleanupTicker.C: 56 + s.cleanupOldSeenItems(ctx) 48 57 } 49 58 } 50 59 } 51 60 61 + func (s *Scheduler) cleanupOldSeenItems(ctx context.Context) { 62 + defer func() { 63 + if r := recover(); r != nil { 64 + s.logger.Error("panic during cleanup", "panic", r) 65 + } 66 + }() 67 + 68 + // Delete items older than 6 months 69 + deleted, err := s.store.CleanupOldSeenItems(ctx, 6*30*24*time.Hour) 70 + if err != nil { 71 + s.logger.Error("failed to cleanup old seen items", "err", err) 72 + return 73 + } 74 + if deleted > 0 { 75 + s.logger.Info("cleaned up old seen items", "deleted", deleted) 76 + } 77 + } 78 + 52 79 func (s *Scheduler) tick(ctx context.Context) { 80 + defer func() { 81 + if r := recover(); r != nil { 82 + s.logger.Error("panic during tick", "panic", r) 83 + } 84 + }() 85 + 53 86 now := time.Now() 54 87 configs, err := s.store.GetDueConfigs(ctx, now) 55 88 if err != nil { ··· 131 164 continue 132 165 } 133 166 134 - var newItems []email.FeedItem 167 + // Collect all GUIDs for this feed to batch check 168 + var guids []string 135 169 for _, item := range result.Items { 136 170 if !item.Published.IsZero() && item.Published.Before(threeMonthsAgo) { 137 171 continue 138 172 } 173 + guids = append(guids, item.GUID) 174 + } 139 175 140 - seen, err := s.store.IsItemSeen(ctx, result.FeedID, item.GUID) 141 - if err != nil { 142 - s.logger.Warn("failed to check if item seen", "err", err) 176 + // Batch check which items have been seen 177 + seenSet, err := s.store.GetSeenGUIDs(ctx, result.FeedID, guids) 178 + if err != nil { 179 + s.logger.Warn("failed to check seen items", "err", err) 180 + continue 181 + } 182 + 183 + // Collect new items 184 + var newItems []email.FeedItem 185 + for _, item := range result.Items { 186 + if !item.Published.IsZero() && item.Published.Before(threeMonthsAgo) { 143 187 continue 144 188 } 145 189 146 - if !seen { 190 + if !seenSet[item.GUID] { 147 191 newItems = append(newItems, email.FeedItem{ 148 192 Title: item.Title, 149 193 Link: item.Link,
+40
store/feeds.go
··· 93 93 return feeds, rows.Err() 94 94 } 95 95 96 + // GetFeedsByConfigs returns a map of configID to feeds for multiple configs in a single query 97 + func (db *DB) GetFeedsByConfigs(ctx context.Context, configIDs []int64) (map[int64][]*Feed, error) { 98 + if len(configIDs) == 0 { 99 + return make(map[int64][]*Feed), nil 100 + } 101 + 102 + // Build the query with the appropriate number of placeholders 103 + args := make([]interface{}, len(configIDs)) 104 + placeholders := "?" 105 + for i := 0; i < len(configIDs)-1; i++ { 106 + placeholders += ",?" 107 + } 108 + for i, id := range configIDs { 109 + args[i] = id 110 + } 111 + 112 + query := fmt.Sprintf( 113 + `SELECT id, config_id, url, name, last_fetched, etag, last_modified 114 + FROM feeds WHERE config_id IN (%s) ORDER BY config_id, id`, 115 + placeholders, 116 + ) 117 + 118 + rows, err := db.QueryContext(ctx, query, args...) 119 + if err != nil { 120 + return nil, fmt.Errorf("query feeds: %w", err) 121 + } 122 + defer rows.Close() 123 + 124 + feedMap := make(map[int64][]*Feed) 125 + for rows.Next() { 126 + var f Feed 127 + if err := rows.Scan(&f.ID, &f.ConfigID, &f.URL, &f.Name, &f.LastFetched, &f.ETag, &f.LastModified); err != nil { 128 + return nil, fmt.Errorf("scan feed: %w", err) 129 + } 130 + feedMap[f.ConfigID] = append(feedMap[f.ConfigID], &f) 131 + } 132 + 133 + return feedMap, rows.Err() 134 + } 135 + 96 136 func (db *DB) UpdateFeedFetched(ctx context.Context, feedID int64, etag, lastModified string) error { 97 137 var etagVal, lmVal sql.NullString 98 138 if etag != "" {
+60
store/items.go
··· 93 93 } 94 94 return items, rows.Err() 95 95 } 96 + 97 + // GetSeenGUIDs returns a set of GUIDs that have been seen for a given feed 98 + func (db *DB) GetSeenGUIDs(ctx context.Context, feedID int64, guids []string) (map[string]bool, error) { 99 + if len(guids) == 0 { 100 + return make(map[string]bool), nil 101 + } 102 + 103 + // Build the query with the appropriate number of placeholders 104 + args := make([]interface{}, 0, len(guids)+1) 105 + args = append(args, feedID) 106 + 107 + placeholders := "?" 108 + for i := 0; i < len(guids)-1; i++ { 109 + placeholders += ",?" 110 + } 111 + for _, guid := range guids { 112 + args = append(args, guid) 113 + } 114 + 115 + query := fmt.Sprintf( 116 + `SELECT guid FROM seen_items WHERE feed_id = ? AND guid IN (%s)`, 117 + placeholders, 118 + ) 119 + 120 + rows, err := db.QueryContext(ctx, query, args...) 121 + if err != nil { 122 + return nil, fmt.Errorf("query seen guids: %w", err) 123 + } 124 + defer rows.Close() 125 + 126 + seenSet := make(map[string]bool) 127 + for rows.Next() { 128 + var guid string 129 + if err := rows.Scan(&guid); err != nil { 130 + return nil, fmt.Errorf("scan guid: %w", err) 131 + } 132 + seenSet[guid] = true 133 + } 134 + 135 + return seenSet, rows.Err() 136 + } 137 + 138 + // CleanupOldSeenItems deletes seen items older than the specified duration 139 + func (db *DB) CleanupOldSeenItems(ctx context.Context, olderThan time.Duration) (int64, error) { 140 + cutoff := time.Now().Add(-olderThan) 141 + result, err := db.ExecContext(ctx, 142 + `DELETE FROM seen_items WHERE seen_at < ?`, 143 + cutoff, 144 + ) 145 + if err != nil { 146 + return 0, fmt.Errorf("cleanup old seen items: %w", err) 147 + } 148 + 149 + deleted, err := result.RowsAffected() 150 + if err != nil { 151 + return 0, fmt.Errorf("get rows affected: %w", err) 152 + } 153 + 154 + return deleted, nil 155 + }
+13 -5
web/handlers.go
··· 91 91 return 92 92 } 93 93 94 + // Batch fetch all feeds for all configs 95 + configIDs := make([]int64, len(configs)) 96 + for i, cfg := range configs { 97 + configIDs[i] = cfg.ID 98 + } 99 + feedsByConfig, err := s.store.GetFeedsByConfigs(ctx, configIDs) 100 + if err != nil { 101 + s.logger.Error("get feeds by configs", "err", err) 102 + http.Error(w, "Internal Server Error", http.StatusInternalServerError) 103 + return 104 + } 105 + 94 106 var configInfos []configInfo 95 107 var earliestNextRun time.Time 96 108 hasAnyActive := false 97 109 98 110 for _, cfg := range configs { 99 - feeds, err := s.store.GetFeedsByConfig(ctx, cfg.ID) 100 - if err != nil { 101 - s.logger.Error("get feeds", "err", err) 102 - continue 103 - } 111 + feeds := feedsByConfig[cfg.ID] 104 112 105 113 isActive := cfg.NextRun.Valid 106 114 if isActive {