Monorepo for wisp.place. A static site hosting service built on top of the AT Protocol. wisp.place

add backfill

Changed files
+177 -2
hosting-service
+3 -2
hosting-service/package.json
··· 3 3 "version": "1.0.0", 4 4 "type": "module", 5 5 "scripts": { 6 - "dev": "tsx watch src/index.ts", 6 + "dev": "tsx --env-file=.env watch src/index.ts", 7 7 "build": "tsc", 8 - "start": "tsx src/index.ts" 8 + "start": "tsx --env-file=.env src/index.ts", 9 + "backfill": "tsx --env-file=.env src/index.ts --backfill" 9 10 }, 10 11 "dependencies": { 11 12 "@atproto/api": "^0.17.4",
+19
hosting-service/src/index.ts
··· 3 3 import { FirehoseWorker } from './lib/firehose'; 4 4 import { logger } from './lib/observability'; 5 5 import { mkdirSync, existsSync } from 'fs'; 6 + import { backfillCache } from './lib/backfill'; 6 7 7 8 const PORT = process.env.PORT ? parseInt(process.env.PORT) : 3001; 8 9 const CACHE_DIR = process.env.CACHE_DIR || './cache/sites'; 10 + 11 + // Parse CLI arguments 12 + const args = process.argv.slice(2); 13 + const hasBackfillFlag = args.includes('--backfill'); 14 + const backfillOnStartup = hasBackfillFlag || process.env.BACKFILL_ON_STARTUP === 'true'; 9 15 10 16 // Ensure cache directory exists 11 17 if (!existsSync(CACHE_DIR)) { ··· 19 25 }); 20 26 21 27 firehose.start(); 28 + 29 + // Run backfill if requested 30 + if (backfillOnStartup) { 31 + console.log('🔄 Backfill requested, starting cache backfill...'); 32 + backfillCache({ 33 + skipExisting: true, 34 + concurrency: 3, 35 + }).then((stats) => { 36 + console.log('✅ Cache backfill completed'); 37 + }).catch((err) => { 38 + console.error('❌ Cache backfill error:', err); 39 + }); 40 + } 22 41 23 42 // Add health check endpoint 24 43 app.get('/health', (c) => {
+136
hosting-service/src/lib/backfill.ts
··· 1 + import { getAllSites } from './db'; 2 + import { fetchSiteRecord, getPdsForDid, downloadAndCacheSite, isCached } from './utils'; 3 + import { logger } from './observability'; 4 + 5 + export interface BackfillOptions { 6 + skipExisting?: boolean; // Skip sites already in cache 7 + concurrency?: number; // Number of sites to cache concurrently 8 + maxSites?: number; // Maximum number of sites to backfill (for testing) 9 + } 10 + 11 + export interface BackfillStats { 12 + total: number; 13 + cached: number; 14 + skipped: number; 15 + failed: number; 16 + duration: number; 17 + } 18 + 19 + /** 20 + * Backfill all sites from the database into the local cache 21 + */ 22 + export async function backfillCache(options: BackfillOptions = {}): Promise<BackfillStats> { 23 + const { 24 + skipExisting = true, 25 + concurrency = 3, 26 + maxSites, 27 + } = options; 28 + 29 + const startTime = Date.now(); 30 + const stats: BackfillStats = { 31 + total: 0, 32 + cached: 0, 33 + skipped: 0, 34 + failed: 0, 35 + duration: 0, 36 + }; 37 + 38 + logger.info('Starting cache backfill', { skipExisting, concurrency, maxSites }); 39 + console.log(` 40 + ╔══════════════════════════════════════════╗ 41 + ║ CACHE BACKFILL STARTING ║ 42 + ╚══════════════════════════════════════════╝ 43 + `); 44 + 45 + try { 46 + // Get all sites from database 47 + let sites = await getAllSites(); 48 + stats.total = sites.length; 49 + 50 + logger.info(`Found ${sites.length} sites in database`); 51 + console.log(`📊 Found ${sites.length} sites in database`); 52 + 53 + // Limit if specified 54 + if (maxSites && maxSites > 0) { 55 + sites = sites.slice(0, maxSites); 56 + console.log(`⚙️ Limited to ${maxSites} sites for backfill`); 57 + } 58 + 59 + // Process sites in batches 60 + const batches: typeof sites[] = []; 61 + for (let i = 0; i < sites.length; i += concurrency) { 62 + batches.push(sites.slice(i, i + concurrency)); 63 + } 64 + 65 + let processed = 0; 66 + for (const batch of batches) { 67 + await Promise.all( 68 + batch.map(async (site) => { 69 + try { 70 + // Check if already cached 71 + if (skipExisting && isCached(site.did, site.rkey)) { 72 + stats.skipped++; 73 + processed++; 74 + logger.debug(`Skipping already cached site`, { did: site.did, rkey: site.rkey }); 75 + console.log(`⏭️ [${processed}/${sites.length}] Skipped (cached): ${site.display_name || site.rkey}`); 76 + return; 77 + } 78 + 79 + // Fetch site record 80 + const siteData = await fetchSiteRecord(site.did, site.rkey); 81 + if (!siteData) { 82 + stats.failed++; 83 + processed++; 84 + logger.error('Site record not found during backfill', null, { did: site.did, rkey: site.rkey }); 85 + console.log(`❌ [${processed}/${sites.length}] Failed (not found): ${site.display_name || site.rkey}`); 86 + return; 87 + } 88 + 89 + // Get PDS endpoint 90 + const pdsEndpoint = await getPdsForDid(site.did); 91 + if (!pdsEndpoint) { 92 + stats.failed++; 93 + processed++; 94 + logger.error('PDS not found during backfill', null, { did: site.did }); 95 + console.log(`❌ [${processed}/${sites.length}] Failed (no PDS): ${site.display_name || site.rkey}`); 96 + return; 97 + } 98 + 99 + // Download and cache site 100 + await downloadAndCacheSite(site.did, site.rkey, siteData.record, pdsEndpoint, siteData.cid); 101 + stats.cached++; 102 + processed++; 103 + logger.info('Successfully cached site during backfill', { did: site.did, rkey: site.rkey }); 104 + console.log(`✅ [${processed}/${sites.length}] Cached: ${site.display_name || site.rkey}`); 105 + } catch (err) { 106 + stats.failed++; 107 + processed++; 108 + logger.error('Failed to cache site during backfill', err, { did: site.did, rkey: site.rkey }); 109 + console.log(`❌ [${processed}/${sites.length}] Failed: ${site.display_name || site.rkey}`); 110 + } 111 + }) 112 + ); 113 + } 114 + 115 + stats.duration = Date.now() - startTime; 116 + 117 + console.log(` 118 + ╔══════════════════════════════════════════╗ 119 + ║ CACHE BACKFILL COMPLETED ║ 120 + ╚══════════════════════════════════════════╝ 121 + 122 + 📊 Total Sites: ${stats.total} 123 + ✅ Cached: ${stats.cached} 124 + ⏭️ Skipped: ${stats.skipped} 125 + ❌ Failed: ${stats.failed} 126 + ⏱️ Duration: ${(stats.duration / 1000).toFixed(2)}s 127 + `); 128 + 129 + logger.info('Cache backfill completed', stats); 130 + } catch (err) { 131 + logger.error('Cache backfill failed', err); 132 + console.error('❌ Cache backfill failed:', err); 133 + } 134 + 135 + return stats; 136 + }
+19
hosting-service/src/lib/db.ts
··· 81 81 } 82 82 } 83 83 84 + export interface SiteRecord { 85 + did: string; 86 + rkey: string; 87 + display_name?: string; 88 + } 89 + 90 + export async function getAllSites(): Promise<SiteRecord[]> { 91 + try { 92 + const result = await sql<SiteRecord[]>` 93 + SELECT did, rkey, display_name FROM sites 94 + ORDER BY created_at DESC 95 + `; 96 + return result; 97 + } catch (err) { 98 + console.error('Failed to get all sites', err); 99 + return []; 100 + } 101 + } 102 + 84 103 /** 85 104 * Generate a numeric lock ID from a string key 86 105 * PostgreSQL advisory locks use bigint (64-bit signed integer)