hacker news alerts in slack (incessant pings if you make front page)

Compare changes

Choose any two refs to compare.

Changed files
+257 -3
src
features
services
libs
+7 -1
src/features/services/check_hn.ts
··· 25 25 } from "../../libs/hackernews"; 26 26 import { addDays } from "../../libs/time"; 27 27 import type { AnyMessageBlock } from "slack-edge"; 28 - import { sqlite } from "../../libs/db"; 28 + import { optimizeLeaderboardSnapshots, sqlite } from "../../libs/db"; 29 29 30 30 // Constants 31 31 const TOP_STORIES_LIMIT = 30; // Front page is considered the top 30 stories ··· 699 699 .delete(leaderboardSnapshots) 700 700 .where(lt(leaderboardSnapshots.expiresAt, currentTime)); 701 701 } 702 + 703 + // Optimize leaderboard snapshots by removing redundant ones with batching 704 + // Use a batch size of 100 for better performance without overwhelming the database 705 + // Use conservative mode (true) to ensure we don't remove important data during sharp changes 706 + const { optimizeLeaderboardSnapshots } = await import("../../libs/db"); 707 + await optimizeLeaderboardSnapshots(100, true); 702 708 } catch (error) { 703 709 console.error("Error cleaning up expired data:", error); 704 710 Sentry.captureException(error);
+250 -2
src/libs/db.ts
··· 1 1 import { drizzle } from "drizzle-orm/bun-sqlite"; 2 2 import { Database } from "bun:sqlite"; 3 3 import * as schema from "./schema"; 4 + import { eq, and, notInArray, count } from "drizzle-orm"; 5 + import * as Sentry from "@sentry/bun"; 6 + 7 + // Define interface for snapshot data 8 + interface Snapshot { 9 + id: number; 10 + timestamp: number; 11 + position: number; 12 + score: number; 13 + } 14 + 15 + interface StoryCount { 16 + story_id: number; 17 + snapshot_count: number; 18 + } 4 19 5 20 // Use environment variable for the database path in production 6 21 const dbPath = process.env.DATABASE_PATH || "./local.db"; ··· 9 24 const sqlite = new Database(dbPath, { 10 25 // Use WAL mode for better concurrency 11 26 readonly: false, 12 - create: true 27 + create: true, 13 28 }); 14 29 15 30 // Set a longer busy timeout to reduce "database is locked" errors ··· 31 46 // Create a Drizzle instance with the database and schema 32 47 export const db = drizzle(sqlite, { schema }); 33 48 49 + /** 50 + * Optimizes leaderboard snapshots by removing redundant entries 51 + * Keeps important snapshots: first, last, and any showing position/score changes 52 + * Uses raw SQL for better performance on large datasets 53 + * Preserves data points showing significant changes 54 + * @param {number} batchSize - Number of stories to process in each batch (default: 50) 55 + * @param {boolean} conservative - If true, uses more conservative rules to keep snapshots (default: true) 56 + */ 57 + async function optimizeLeaderboardSnapshots(batchSize = 50, conservative = true) { 58 + try { 59 + console.log("Starting leaderboard snapshots optimization..."); 60 + const startTime = Date.now(); 61 + 62 + // Get count of stories with snapshots 63 + // Get count of eligible stories (more than 3 snapshots) 64 + const storyCountResult = sqlite.query( 65 + "SELECT COUNT(*) as count FROM (SELECT story_id FROM leaderboard_snapshots GROUP BY story_id HAVING COUNT(*) > 3)", 66 + ); 67 + const storyCount = storyCountResult.get() 68 + ? (storyCountResult.get() as { count: number }).count 69 + : 0; 70 + 71 + if (storyCount === 0) { 72 + console.log("No stories with snapshots to optimize"); 73 + return; 74 + } 75 + 76 + console.log( 77 + `Found ${storyCount} stories with leaderboard snapshots to analyze`, 78 + ); 79 + let totalRedundantSnapshots = 0; 80 + let processedStories = 0; 81 + 82 + // Direct SQL approach for performance 83 + // Create temporary table for IDs to keep 84 + sqlite.exec(` 85 + DROP TABLE IF EXISTS temp_snapshots_to_keep; 86 + CREATE TEMPORARY TABLE temp_snapshots_to_keep ( 87 + id INTEGER NOT NULL 88 + ); 89 + `); 90 + 91 + // Get stories with more than 3 snapshots (optimization candidates) 92 + const candidateStories = sqlite 93 + .query( 94 + `SELECT story_id, COUNT(*) as snapshot_count 95 + FROM leaderboard_snapshots 96 + GROUP BY story_id 97 + HAVING COUNT(*) > 3 98 + ORDER BY snapshot_count DESC 99 + LIMIT ${batchSize}`, 100 + ) 101 + .all() as StoryCount[]; 102 + 103 + // Process each story in batches for memory efficiency 104 + for (const story of candidateStories) { 105 + const storyId = story.story_id; 106 + if (!storyId) continue; 107 + 108 + try { 109 + // Clear the temporary table 110 + sqlite.exec("DELETE FROM temp_snapshots_to_keep"); 111 + 112 + // Get all snapshots for this story with direct SQL for better performance 113 + const snapshots = sqlite 114 + .prepare( 115 + `SELECT id, timestamp, position, score 116 + FROM leaderboard_snapshots 117 + WHERE story_id = ? 118 + ORDER BY timestamp`, 119 + ) 120 + .all(storyId) as Snapshot[]; 121 + 122 + if (!snapshots || snapshots.length <= 3) { 123 + console.log(`Skipping story ${storyId}: Only ${snapshots?.length || 0} snapshots (minimum 4 required)`); 124 + continue; 125 + } 126 + 127 + // Always keep first and last snapshots 128 + const firstId = snapshots[0]?.id; 129 + const lastId = snapshots[snapshots.length - 1]?.id; 130 + 131 + if (firstId) { 132 + sqlite.exec(`INSERT INTO temp_snapshots_to_keep VALUES (${firstId})`); 133 + } 134 + 135 + if (lastId && lastId !== firstId) { 136 + sqlite.exec(`INSERT INTO temp_snapshots_to_keep VALUES (${lastId})`); 137 + } 138 + 139 + let lastPosition = snapshots[0]?.position; 140 + let lastScore = snapshots[0]?.score; 141 + let lastKeptIndex = 0; 142 + 143 + // Track potential sharp changes 144 + let significantChanges = 0; 145 + let maxPositionJump = 0; 146 + let maxScoreJump = 0; 147 + 148 + // First pass - analyze change patterns to detect sharp/significant changes 149 + if (conservative) { 150 + for (let i = 1; i < snapshots.length; i++) { 151 + if (snapshots[i] && snapshots[i-1]) { 152 + const positionDiff = Math.abs((snapshots[i]?.position ?? 0) - (snapshots[i-1]?.position ?? 0)); 153 + const scoreDiff = Math.abs((snapshots[i]?.score ?? 0) - (snapshots[i-1]?.score ?? 0)); 154 + 155 + maxPositionJump = Math.max(maxPositionJump, positionDiff); 156 + maxScoreJump = Math.max(maxScoreJump, scoreDiff); 157 + 158 + // Count significant changes (position jumps of 3+ or score changes of 10%+) 159 + if (positionDiff >= 3 || scoreDiff >= Math.max(5, (snapshots[i-1]?.score ?? 0) * 0.1)) { 160 + significantChanges++; 161 + } 162 + } 163 + } 164 + } 165 + 166 + // Determine how aggressive to be based on the story's volatility 167 + const hasSharpChanges = significantChanges >= 2 || maxPositionJump >= 5 || maxScoreJump >= 20; 168 + const keepEveryNthPoint = hasSharpChanges ? 2 : 4; // Keep more points if story has sharp changes 169 + 170 + // Find snapshots to keep in one pass (changes and last before changes) 171 + for (let i = 1; i < snapshots.length - 1; i++) { 172 + const snapshot = snapshots[i]; 173 + if ( 174 + !snapshot || 175 + typeof snapshot.position !== "number" || 176 + typeof snapshot.score !== "number" 177 + ) 178 + continue; 179 + 180 + // With conservative mode, we'll keep more snapshots 181 + if (conservative) { 182 + // Keep snapshots at regular intervals to preserve shape of the graph 183 + if (i % keepEveryNthPoint === 0) { 184 + if (snapshot.id) { 185 + sqlite.exec( 186 + `INSERT INTO temp_snapshots_to_keep VALUES (${snapshot.id})`, 187 + ); 188 + } 189 + continue; 190 + } 191 + } 192 + 193 + const positionChanged = snapshot.position !== lastPosition; 194 + const scoreChanged = snapshot.score !== lastScore; 195 + 196 + // For stories with sharp changes, be more sensitive to any change 197 + const significantPositionChange = Math.abs((snapshot.position ?? 0) - (lastPosition ?? 0)) >= 2; 198 + const significantScoreChange = Math.abs((snapshot.score ?? 0) - (lastScore ?? 0)) >= 3; 199 + 200 + if (positionChanged || scoreChanged || 201 + (conservative && (significantPositionChange || significantScoreChange))) { 202 + // Keep last snapshot before change 203 + if (i - 1 > lastKeptIndex) { 204 + const prevId = snapshots[i - 1]?.id; 205 + if (prevId) { 206 + sqlite.exec( 207 + `INSERT INTO temp_snapshots_to_keep VALUES (${prevId})`, 208 + ); 209 + } 210 + } 211 + 212 + // Keep snapshot with change 213 + if (snapshot.id) { 214 + sqlite.exec( 215 + `INSERT INTO temp_snapshots_to_keep VALUES (${snapshot.id})`, 216 + ); 217 + } 218 + 219 + lastPosition = snapshot.position; 220 + lastScore = snapshot.score; 221 + lastKeptIndex = i; 222 + } 223 + } 224 + 225 + // Delete redundant snapshots efficiently using NOT EXISTS 226 + const statement = sqlite.prepare( 227 + `DELETE FROM leaderboard_snapshots 228 + WHERE story_id = ? 229 + AND NOT EXISTS ( 230 + SELECT 1 FROM temp_snapshots_to_keep 231 + WHERE temp_snapshots_to_keep.id = leaderboard_snapshots.id 232 + )`, 233 + ); 234 + 235 + // Run once and get changes 236 + const deletedCount = statement.run(storyId).changes; 237 + statement.finalize(); 238 + 239 + // Count already calculated above 240 + totalRedundantSnapshots += deletedCount; 241 + processedStories++; 242 + 243 + // Log more details to help debug issues 244 + const keptCount = snapshots.length - deletedCount; 245 + const keepPercent = Math.round((keptCount / snapshots.length) * 100); 246 + 247 + console.log( 248 + `Story ${storyId}: ${keptCount}/${snapshots.length} snapshots kept (${keepPercent}%)${hasSharpChanges ? ' - SHARP CHANGES DETECTED' : ''} - Max jumps: pos=${maxPositionJump}, score=${maxScoreJump}` 249 + ); 250 + 251 + if (processedStories % 10 === 0) { 252 + console.log( 253 + `Processed ${processedStories}/${storyCount} stories, removed ${totalRedundantSnapshots} redundant snapshots so far`, 254 + ); 255 + } 256 + } catch (error) { 257 + console.error( 258 + `Error optimizing snapshots for story ${storyId}:`, 259 + error, 260 + ); 261 + Sentry.captureException(error); 262 + } 263 + } 264 + 265 + // Clean up temporary table 266 + sqlite.exec("DROP TABLE IF EXISTS temp_snapshots_to_keep"); 267 + 268 + const duration = (Date.now() - startTime) / 1000; 269 + console.log( 270 + `Leaderboard optimization complete: processed ${processedStories}/${storyCount} stories, removed ${totalRedundantSnapshots} redundant snapshots in ${duration.toFixed(2)}s`, 271 + ); 272 + 273 + // If there are more stories to process, return how many are left 274 + return storyCount - processedStories; 275 + } catch (error) { 276 + console.error("Error during leaderboard snapshots optimization:", error); 277 + Sentry.captureException(error); 278 + return 0; 279 + } 280 + } 281 + 34 282 // Export the sqlite instance and schema for use in other files 35 - export { sqlite, schema }; 283 + export { sqlite, schema, optimizeLeaderboardSnapshots };