+3
-2
hosting-service/package.json
+3
-2
hosting-service/package.json
···
3
3
"version": "1.0.0",
4
4
"type": "module",
5
5
"scripts": {
6
-
"dev": "tsx watch src/index.ts",
6
+
"dev": "tsx --env-file=.env watch src/index.ts",
7
7
"build": "tsc",
8
-
"start": "tsx src/index.ts"
8
+
"start": "tsx --env-file=.env src/index.ts",
9
+
"backfill": "tsx --env-file=.env src/index.ts --backfill"
9
10
},
10
11
"dependencies": {
11
12
"@atproto/api": "^0.17.4",
+19
hosting-service/src/index.ts
+19
hosting-service/src/index.ts
···
3
3
import { FirehoseWorker } from './lib/firehose';
4
4
import { logger } from './lib/observability';
5
5
import { mkdirSync, existsSync } from 'fs';
6
+
import { backfillCache } from './lib/backfill';
6
7
7
8
const PORT = process.env.PORT ? parseInt(process.env.PORT) : 3001;
8
9
const CACHE_DIR = process.env.CACHE_DIR || './cache/sites';
10
+
11
+
// Parse CLI arguments
12
+
const args = process.argv.slice(2);
13
+
const hasBackfillFlag = args.includes('--backfill');
14
+
const backfillOnStartup = hasBackfillFlag || process.env.BACKFILL_ON_STARTUP === 'true';
9
15
10
16
// Ensure cache directory exists
11
17
if (!existsSync(CACHE_DIR)) {
···
19
25
});
20
26
21
27
firehose.start();
28
+
29
+
// Run backfill if requested
30
+
if (backfillOnStartup) {
31
+
console.log('🔄 Backfill requested, starting cache backfill...');
32
+
backfillCache({
33
+
skipExisting: true,
34
+
concurrency: 3,
35
+
}).then((stats) => {
36
+
console.log('✅ Cache backfill completed');
37
+
}).catch((err) => {
38
+
console.error('❌ Cache backfill error:', err);
39
+
});
40
+
}
22
41
23
42
// Add health check endpoint
24
43
app.get('/health', (c) => {
+136
hosting-service/src/lib/backfill.ts
+136
hosting-service/src/lib/backfill.ts
···
1
+
import { getAllSites } from './db';
2
+
import { fetchSiteRecord, getPdsForDid, downloadAndCacheSite, isCached } from './utils';
3
+
import { logger } from './observability';
4
+
5
+
export interface BackfillOptions {
6
+
skipExisting?: boolean; // Skip sites already in cache
7
+
concurrency?: number; // Number of sites to cache concurrently
8
+
maxSites?: number; // Maximum number of sites to backfill (for testing)
9
+
}
10
+
11
+
export interface BackfillStats {
12
+
total: number;
13
+
cached: number;
14
+
skipped: number;
15
+
failed: number;
16
+
duration: number;
17
+
}
18
+
19
+
/**
20
+
* Backfill all sites from the database into the local cache
21
+
*/
22
+
export async function backfillCache(options: BackfillOptions = {}): Promise<BackfillStats> {
23
+
const {
24
+
skipExisting = true,
25
+
concurrency = 3,
26
+
maxSites,
27
+
} = options;
28
+
29
+
const startTime = Date.now();
30
+
const stats: BackfillStats = {
31
+
total: 0,
32
+
cached: 0,
33
+
skipped: 0,
34
+
failed: 0,
35
+
duration: 0,
36
+
};
37
+
38
+
logger.info('Starting cache backfill', { skipExisting, concurrency, maxSites });
39
+
console.log(`
40
+
╔══════════════════════════════════════════╗
41
+
║ CACHE BACKFILL STARTING ║
42
+
╚══════════════════════════════════════════╝
43
+
`);
44
+
45
+
try {
46
+
// Get all sites from database
47
+
let sites = await getAllSites();
48
+
stats.total = sites.length;
49
+
50
+
logger.info(`Found ${sites.length} sites in database`);
51
+
console.log(`📊 Found ${sites.length} sites in database`);
52
+
53
+
// Limit if specified
54
+
if (maxSites && maxSites > 0) {
55
+
sites = sites.slice(0, maxSites);
56
+
console.log(`⚙️ Limited to ${maxSites} sites for backfill`);
57
+
}
58
+
59
+
// Process sites in batches
60
+
const batches: typeof sites[] = [];
61
+
for (let i = 0; i < sites.length; i += concurrency) {
62
+
batches.push(sites.slice(i, i + concurrency));
63
+
}
64
+
65
+
let processed = 0;
66
+
for (const batch of batches) {
67
+
await Promise.all(
68
+
batch.map(async (site) => {
69
+
try {
70
+
// Check if already cached
71
+
if (skipExisting && isCached(site.did, site.rkey)) {
72
+
stats.skipped++;
73
+
processed++;
74
+
logger.debug(`Skipping already cached site`, { did: site.did, rkey: site.rkey });
75
+
console.log(`⏭️ [${processed}/${sites.length}] Skipped (cached): ${site.display_name || site.rkey}`);
76
+
return;
77
+
}
78
+
79
+
// Fetch site record
80
+
const siteData = await fetchSiteRecord(site.did, site.rkey);
81
+
if (!siteData) {
82
+
stats.failed++;
83
+
processed++;
84
+
logger.error('Site record not found during backfill', null, { did: site.did, rkey: site.rkey });
85
+
console.log(`❌ [${processed}/${sites.length}] Failed (not found): ${site.display_name || site.rkey}`);
86
+
return;
87
+
}
88
+
89
+
// Get PDS endpoint
90
+
const pdsEndpoint = await getPdsForDid(site.did);
91
+
if (!pdsEndpoint) {
92
+
stats.failed++;
93
+
processed++;
94
+
logger.error('PDS not found during backfill', null, { did: site.did });
95
+
console.log(`❌ [${processed}/${sites.length}] Failed (no PDS): ${site.display_name || site.rkey}`);
96
+
return;
97
+
}
98
+
99
+
// Download and cache site
100
+
await downloadAndCacheSite(site.did, site.rkey, siteData.record, pdsEndpoint, siteData.cid);
101
+
stats.cached++;
102
+
processed++;
103
+
logger.info('Successfully cached site during backfill', { did: site.did, rkey: site.rkey });
104
+
console.log(`✅ [${processed}/${sites.length}] Cached: ${site.display_name || site.rkey}`);
105
+
} catch (err) {
106
+
stats.failed++;
107
+
processed++;
108
+
logger.error('Failed to cache site during backfill', err, { did: site.did, rkey: site.rkey });
109
+
console.log(`❌ [${processed}/${sites.length}] Failed: ${site.display_name || site.rkey}`);
110
+
}
111
+
})
112
+
);
113
+
}
114
+
115
+
stats.duration = Date.now() - startTime;
116
+
117
+
console.log(`
118
+
╔══════════════════════════════════════════╗
119
+
║ CACHE BACKFILL COMPLETED ║
120
+
╚══════════════════════════════════════════╝
121
+
122
+
📊 Total Sites: ${stats.total}
123
+
✅ Cached: ${stats.cached}
124
+
⏭️ Skipped: ${stats.skipped}
125
+
❌ Failed: ${stats.failed}
126
+
⏱️ Duration: ${(stats.duration / 1000).toFixed(2)}s
127
+
`);
128
+
129
+
logger.info('Cache backfill completed', stats);
130
+
} catch (err) {
131
+
logger.error('Cache backfill failed', err);
132
+
console.error('❌ Cache backfill failed:', err);
133
+
}
134
+
135
+
return stats;
136
+
}
+19
hosting-service/src/lib/db.ts
+19
hosting-service/src/lib/db.ts
···
81
81
}
82
82
}
83
83
84
+
export interface SiteRecord {
85
+
did: string;
86
+
rkey: string;
87
+
display_name?: string;
88
+
}
89
+
90
+
export async function getAllSites(): Promise<SiteRecord[]> {
91
+
try {
92
+
const result = await sql<SiteRecord[]>`
93
+
SELECT did, rkey, display_name FROM sites
94
+
ORDER BY created_at DESC
95
+
`;
96
+
return result;
97
+
} catch (err) {
98
+
console.error('Failed to get all sites', err);
99
+
return [];
100
+
}
101
+
}
102
+
84
103
/**
85
104
* Generate a numeric lock ID from a string key
86
105
* PostgreSQL advisory locks use bigint (64-bit signed integer)