+120
-23
hosting-service/src/lib/utils.ts
+120
-23
hosting-service/src/lib/utils.ts
···
13
13
cachedAt: number;
14
14
did: string;
15
15
rkey: string;
16
+
// Map of file path to blob CID for incremental updates
17
+
fileCids?: Record<string, string>;
16
18
}
17
19
18
20
/**
···
200
202
throw new Error('Invalid record structure: root missing entries array');
201
203
}
202
204
205
+
// Get existing cache metadata to check for incremental updates
206
+
const existingMetadata = await getCacheMetadata(did, rkey);
207
+
const existingFileCids = existingMetadata?.fileCids || {};
208
+
203
209
// Use a temporary directory with timestamp to avoid collisions
204
210
const tempSuffix = `.tmp-${Date.now()}-${Math.random().toString(36).slice(2, 9)}`;
205
211
const tempDir = `${CACHE_DIR}/${did}/${rkey}${tempSuffix}`;
206
212
const finalDir = `${CACHE_DIR}/${did}/${rkey}`;
207
213
208
214
try {
209
-
// Download to temporary directory
210
-
await cacheFiles(did, rkey, record.root.entries, pdsEndpoint, '', tempSuffix);
211
-
await saveCacheMetadata(did, rkey, recordCid, tempSuffix);
215
+
// Collect file CIDs from the new record
216
+
const newFileCids: Record<string, string> = {};
217
+
collectFileCidsFromEntries(record.root.entries, '', newFileCids);
218
+
219
+
// Download/copy files to temporary directory (with incremental logic)
220
+
await cacheFiles(did, rkey, record.root.entries, pdsEndpoint, '', tempSuffix, existingFileCids, finalDir);
221
+
await saveCacheMetadata(did, rkey, recordCid, tempSuffix, newFileCids);
212
222
213
223
// Atomically replace old cache with new cache
214
224
// On POSIX systems (Linux/macOS), rename is atomic
···
245
255
}
246
256
}
247
257
258
+
/**
259
+
* Recursively collect file CIDs from entries for incremental update tracking
260
+
*/
261
+
function collectFileCidsFromEntries(entries: Entry[], pathPrefix: string, fileCids: Record<string, string>): void {
262
+
for (const entry of entries) {
263
+
const currentPath = pathPrefix ? `${pathPrefix}/${entry.name}` : entry.name;
264
+
const node = entry.node;
265
+
266
+
if ('type' in node && node.type === 'directory' && 'entries' in node) {
267
+
collectFileCidsFromEntries(node.entries, currentPath, fileCids);
268
+
} else if ('type' in node && node.type === 'file' && 'blob' in node) {
269
+
const fileNode = node as File;
270
+
const cid = extractBlobCid(fileNode.blob);
271
+
if (cid) {
272
+
fileCids[currentPath] = cid;
273
+
}
274
+
}
275
+
}
276
+
}
277
+
248
278
async function cacheFiles(
249
279
did: string,
250
280
site: string,
251
281
entries: Entry[],
252
282
pdsEndpoint: string,
253
283
pathPrefix: string,
254
-
dirSuffix: string = ''
284
+
dirSuffix: string = '',
285
+
existingFileCids: Record<string, string> = {},
286
+
existingCacheDir?: string
255
287
): Promise<void> {
256
-
// Collect all file blob download tasks first
288
+
// Collect file tasks, separating unchanged files from new/changed files
257
289
const downloadTasks: Array<() => Promise<void>> = [];
258
-
290
+
const copyTasks: Array<() => Promise<void>> = [];
291
+
259
292
function collectFileTasks(
260
293
entries: Entry[],
261
294
currentPathPrefix: string
···
268
301
collectFileTasks(node.entries, currentPath);
269
302
} else if ('type' in node && node.type === 'file' && 'blob' in node) {
270
303
const fileNode = node as File;
271
-
downloadTasks.push(() => cacheFileBlob(
272
-
did,
273
-
site,
274
-
currentPath,
275
-
fileNode.blob,
276
-
pdsEndpoint,
277
-
fileNode.encoding,
278
-
fileNode.mimeType,
279
-
fileNode.base64,
280
-
dirSuffix
281
-
));
304
+
const cid = extractBlobCid(fileNode.blob);
305
+
306
+
// Check if file is unchanged (same CID as existing cache)
307
+
if (cid && existingFileCids[currentPath] === cid && existingCacheDir) {
308
+
// File unchanged - copy from existing cache instead of downloading
309
+
copyTasks.push(() => copyExistingFile(
310
+
did,
311
+
site,
312
+
currentPath,
313
+
dirSuffix,
314
+
existingCacheDir
315
+
));
316
+
} else {
317
+
// File new or changed - download it
318
+
downloadTasks.push(() => cacheFileBlob(
319
+
did,
320
+
site,
321
+
currentPath,
322
+
fileNode.blob,
323
+
pdsEndpoint,
324
+
fileNode.encoding,
325
+
fileNode.mimeType,
326
+
fileNode.base64,
327
+
dirSuffix
328
+
));
329
+
}
282
330
}
283
331
}
284
332
}
285
333
286
334
collectFileTasks(entries, pathPrefix);
287
335
288
-
// Execute downloads concurrently with a limit of 3 at a time
289
-
const concurrencyLimit = 3;
290
-
for (let i = 0; i < downloadTasks.length; i += concurrencyLimit) {
291
-
const batch = downloadTasks.slice(i, i + concurrencyLimit);
336
+
console.log(`[Incremental Update] Files to copy: ${copyTasks.length}, Files to download: ${downloadTasks.length}`);
337
+
338
+
// Copy unchanged files in parallel (fast local operations)
339
+
const copyLimit = 10;
340
+
for (let i = 0; i < copyTasks.length; i += copyLimit) {
341
+
const batch = copyTasks.slice(i, i + copyLimit);
342
+
await Promise.all(batch.map(task => task()));
343
+
}
344
+
345
+
// Download new/changed files concurrently with a limit of 3 at a time
346
+
const downloadLimit = 3;
347
+
for (let i = 0; i < downloadTasks.length; i += downloadLimit) {
348
+
const batch = downloadTasks.slice(i, i + downloadLimit);
292
349
await Promise.all(batch.map(task => task()));
350
+
}
351
+
}
352
+
353
+
/**
354
+
* Copy an unchanged file from existing cache to new cache location
355
+
*/
356
+
async function copyExistingFile(
357
+
did: string,
358
+
site: string,
359
+
filePath: string,
360
+
dirSuffix: string,
361
+
existingCacheDir: string
362
+
): Promise<void> {
363
+
const { copyFile } = await import('fs/promises');
364
+
365
+
const sourceFile = `${existingCacheDir}/${filePath}`;
366
+
const destFile = `${CACHE_DIR}/${did}/${site}${dirSuffix}/${filePath}`;
367
+
const destDir = destFile.substring(0, destFile.lastIndexOf('/'));
368
+
369
+
// Create destination directory if needed
370
+
if (destDir && !existsSync(destDir)) {
371
+
mkdirSync(destDir, { recursive: true });
372
+
}
373
+
374
+
try {
375
+
// Copy the file
376
+
await copyFile(sourceFile, destFile);
377
+
378
+
// Copy metadata file if it exists
379
+
const sourceMetaFile = `${sourceFile}.meta`;
380
+
const destMetaFile = `${destFile}.meta`;
381
+
if (existsSync(sourceMetaFile)) {
382
+
await copyFile(sourceMetaFile, destMetaFile);
383
+
}
384
+
385
+
console.log(`[Incremental] Copied unchanged file: ${filePath}`);
386
+
} catch (err) {
387
+
console.error(`[Incremental] Failed to copy file ${filePath}, will attempt download:`, err);
388
+
throw err;
293
389
}
294
390
}
295
391
···
404
500
return existsSync(`${CACHE_DIR}/${did}/${site}`);
405
501
}
406
502
407
-
async function saveCacheMetadata(did: string, rkey: string, recordCid: string, dirSuffix: string = ''): Promise<void> {
503
+
async function saveCacheMetadata(did: string, rkey: string, recordCid: string, dirSuffix: string = '', fileCids?: Record<string, string>): Promise<void> {
408
504
const metadata: CacheMetadata = {
409
505
recordCid,
410
506
cachedAt: Date.now(),
411
507
did,
412
-
rkey
508
+
rkey,
509
+
fileCids
413
510
};
414
511
415
512
const metadataPath = `${CACHE_DIR}/${did}/${rkey}${dirSuffix}/.metadata.json`;