Monorepo for wisp.place. A static site hosting service built on top of the AT Protocol. wisp.place

save CIDs to local metadata file, incrementally download new files, copy old

Changed files
+123 -24
hosting-service
+120 -23
hosting-service/src/lib/utils.ts
··· 13 13 cachedAt: number; 14 14 did: string; 15 15 rkey: string; 16 + // Map of file path to blob CID for incremental updates 17 + fileCids?: Record<string, string>; 16 18 } 17 19 18 20 /** ··· 200 202 throw new Error('Invalid record structure: root missing entries array'); 201 203 } 202 204 205 + // Get existing cache metadata to check for incremental updates 206 + const existingMetadata = await getCacheMetadata(did, rkey); 207 + const existingFileCids = existingMetadata?.fileCids || {}; 208 + 203 209 // Use a temporary directory with timestamp to avoid collisions 204 210 const tempSuffix = `.tmp-${Date.now()}-${Math.random().toString(36).slice(2, 9)}`; 205 211 const tempDir = `${CACHE_DIR}/${did}/${rkey}${tempSuffix}`; 206 212 const finalDir = `${CACHE_DIR}/${did}/${rkey}`; 207 213 208 214 try { 209 - // Download to temporary directory 210 - await cacheFiles(did, rkey, record.root.entries, pdsEndpoint, '', tempSuffix); 211 - await saveCacheMetadata(did, rkey, recordCid, tempSuffix); 215 + // Collect file CIDs from the new record 216 + const newFileCids: Record<string, string> = {}; 217 + collectFileCidsFromEntries(record.root.entries, '', newFileCids); 218 + 219 + // Download/copy files to temporary directory (with incremental logic) 220 + await cacheFiles(did, rkey, record.root.entries, pdsEndpoint, '', tempSuffix, existingFileCids, finalDir); 221 + await saveCacheMetadata(did, rkey, recordCid, tempSuffix, newFileCids); 212 222 213 223 // Atomically replace old cache with new cache 214 224 // On POSIX systems (Linux/macOS), rename is atomic ··· 245 255 } 246 256 } 247 257 258 + /** 259 + * Recursively collect file CIDs from entries for incremental update tracking 260 + */ 261 + function collectFileCidsFromEntries(entries: Entry[], pathPrefix: string, fileCids: Record<string, string>): void { 262 + for (const entry of entries) { 263 + const currentPath = pathPrefix ? `${pathPrefix}/${entry.name}` : entry.name; 264 + const node = entry.node; 265 + 266 + if ('type' in node && node.type === 'directory' && 'entries' in node) { 267 + collectFileCidsFromEntries(node.entries, currentPath, fileCids); 268 + } else if ('type' in node && node.type === 'file' && 'blob' in node) { 269 + const fileNode = node as File; 270 + const cid = extractBlobCid(fileNode.blob); 271 + if (cid) { 272 + fileCids[currentPath] = cid; 273 + } 274 + } 275 + } 276 + } 277 + 248 278 async function cacheFiles( 249 279 did: string, 250 280 site: string, 251 281 entries: Entry[], 252 282 pdsEndpoint: string, 253 283 pathPrefix: string, 254 - dirSuffix: string = '' 284 + dirSuffix: string = '', 285 + existingFileCids: Record<string, string> = {}, 286 + existingCacheDir?: string 255 287 ): Promise<void> { 256 - // Collect all file blob download tasks first 288 + // Collect file tasks, separating unchanged files from new/changed files 257 289 const downloadTasks: Array<() => Promise<void>> = []; 258 - 290 + const copyTasks: Array<() => Promise<void>> = []; 291 + 259 292 function collectFileTasks( 260 293 entries: Entry[], 261 294 currentPathPrefix: string ··· 268 301 collectFileTasks(node.entries, currentPath); 269 302 } else if ('type' in node && node.type === 'file' && 'blob' in node) { 270 303 const fileNode = node as File; 271 - downloadTasks.push(() => cacheFileBlob( 272 - did, 273 - site, 274 - currentPath, 275 - fileNode.blob, 276 - pdsEndpoint, 277 - fileNode.encoding, 278 - fileNode.mimeType, 279 - fileNode.base64, 280 - dirSuffix 281 - )); 304 + const cid = extractBlobCid(fileNode.blob); 305 + 306 + // Check if file is unchanged (same CID as existing cache) 307 + if (cid && existingFileCids[currentPath] === cid && existingCacheDir) { 308 + // File unchanged - copy from existing cache instead of downloading 309 + copyTasks.push(() => copyExistingFile( 310 + did, 311 + site, 312 + currentPath, 313 + dirSuffix, 314 + existingCacheDir 315 + )); 316 + } else { 317 + // File new or changed - download it 318 + downloadTasks.push(() => cacheFileBlob( 319 + did, 320 + site, 321 + currentPath, 322 + fileNode.blob, 323 + pdsEndpoint, 324 + fileNode.encoding, 325 + fileNode.mimeType, 326 + fileNode.base64, 327 + dirSuffix 328 + )); 329 + } 282 330 } 283 331 } 284 332 } 285 333 286 334 collectFileTasks(entries, pathPrefix); 287 335 288 - // Execute downloads concurrently with a limit of 3 at a time 289 - const concurrencyLimit = 3; 290 - for (let i = 0; i < downloadTasks.length; i += concurrencyLimit) { 291 - const batch = downloadTasks.slice(i, i + concurrencyLimit); 336 + console.log(`[Incremental Update] Files to copy: ${copyTasks.length}, Files to download: ${downloadTasks.length}`); 337 + 338 + // Copy unchanged files in parallel (fast local operations) 339 + const copyLimit = 10; 340 + for (let i = 0; i < copyTasks.length; i += copyLimit) { 341 + const batch = copyTasks.slice(i, i + copyLimit); 342 + await Promise.all(batch.map(task => task())); 343 + } 344 + 345 + // Download new/changed files concurrently with a limit of 3 at a time 346 + const downloadLimit = 3; 347 + for (let i = 0; i < downloadTasks.length; i += downloadLimit) { 348 + const batch = downloadTasks.slice(i, i + downloadLimit); 292 349 await Promise.all(batch.map(task => task())); 350 + } 351 + } 352 + 353 + /** 354 + * Copy an unchanged file from existing cache to new cache location 355 + */ 356 + async function copyExistingFile( 357 + did: string, 358 + site: string, 359 + filePath: string, 360 + dirSuffix: string, 361 + existingCacheDir: string 362 + ): Promise<void> { 363 + const { copyFile } = await import('fs/promises'); 364 + 365 + const sourceFile = `${existingCacheDir}/${filePath}`; 366 + const destFile = `${CACHE_DIR}/${did}/${site}${dirSuffix}/${filePath}`; 367 + const destDir = destFile.substring(0, destFile.lastIndexOf('/')); 368 + 369 + // Create destination directory if needed 370 + if (destDir && !existsSync(destDir)) { 371 + mkdirSync(destDir, { recursive: true }); 372 + } 373 + 374 + try { 375 + // Copy the file 376 + await copyFile(sourceFile, destFile); 377 + 378 + // Copy metadata file if it exists 379 + const sourceMetaFile = `${sourceFile}.meta`; 380 + const destMetaFile = `${destFile}.meta`; 381 + if (existsSync(sourceMetaFile)) { 382 + await copyFile(sourceMetaFile, destMetaFile); 383 + } 384 + 385 + console.log(`[Incremental] Copied unchanged file: ${filePath}`); 386 + } catch (err) { 387 + console.error(`[Incremental] Failed to copy file ${filePath}, will attempt download:`, err); 388 + throw err; 293 389 } 294 390 } 295 391 ··· 404 500 return existsSync(`${CACHE_DIR}/${did}/${site}`); 405 501 } 406 502 407 - async function saveCacheMetadata(did: string, rkey: string, recordCid: string, dirSuffix: string = ''): Promise<void> { 503 + async function saveCacheMetadata(did: string, rkey: string, recordCid: string, dirSuffix: string = '', fileCids?: Record<string, string>): Promise<void> { 408 504 const metadata: CacheMetadata = { 409 505 recordCid, 410 506 cachedAt: Date.now(), 411 507 did, 412 - rkey 508 + rkey, 509 + fileCids 413 510 }; 414 511 415 512 const metadataPath = `${CACHE_DIR}/${did}/${rkey}${dirSuffix}/.metadata.json`;
+3 -1
hosting-service/tsconfig.json
··· 24 24 25 25 /* Code doesn't run in DOM */ 26 26 "lib": ["es2022"], 27 - } 27 + }, 28 + "include": ["src/**/*"], 29 + "exclude": ["node_modules", "cache", "dist"] 28 30 }