Monorepo for wisp.place. A static site hosting service built on top of the AT Protocol. wisp.place

save CIDs to local metadata file, incrementally download new files, copy old

authored by nekomimi.pet and committed by nekomimi.pet 01f2c4bd 56a2017c

Changed files
+123 -24
hosting-service
+120 -23
hosting-service/src/lib/utils.ts
··· 13 13 cachedAt: number; 14 14 did: string; 15 15 rkey: string; 16 + // Map of file path to blob CID for incremental updates 17 + fileCids?: Record<string, string>; 16 18 } 17 19 18 20 /** ··· 200 202 throw new Error('Invalid record structure: root missing entries array'); 201 203 } 202 204 205 + // Get existing cache metadata to check for incremental updates 206 + const existingMetadata = await getCacheMetadata(did, rkey); 207 + const existingFileCids = existingMetadata?.fileCids || {}; 208 + 203 209 // Use a temporary directory with timestamp to avoid collisions 204 210 const tempSuffix = `.tmp-${Date.now()}-${Math.random().toString(36).slice(2, 9)}`; 205 211 const tempDir = `${CACHE_DIR}/${did}/${rkey}${tempSuffix}`; 206 212 const finalDir = `${CACHE_DIR}/${did}/${rkey}`; 207 213 208 214 try { 209 - // Download to temporary directory 210 - await cacheFiles(did, rkey, record.root.entries, pdsEndpoint, '', tempSuffix); 211 - await saveCacheMetadata(did, rkey, recordCid, tempSuffix); 215 + // Collect file CIDs from the new record 216 + const newFileCids: Record<string, string> = {}; 217 + collectFileCidsFromEntries(record.root.entries, '', newFileCids); 218 + 219 + // Download/copy files to temporary directory (with incremental logic) 220 + await cacheFiles(did, rkey, record.root.entries, pdsEndpoint, '', tempSuffix, existingFileCids, finalDir); 221 + await saveCacheMetadata(did, rkey, recordCid, tempSuffix, newFileCids); 212 222 213 223 // Atomically replace old cache with new cache 214 224 // On POSIX systems (Linux/macOS), rename is atomic ··· 245 255 } 246 256 } 247 257 258 + /** 259 + * Recursively collect file CIDs from entries for incremental update tracking 260 + */ 261 + function collectFileCidsFromEntries(entries: Entry[], pathPrefix: string, fileCids: Record<string, string>): void { 262 + for (const entry of entries) { 263 + const currentPath = pathPrefix ? `${pathPrefix}/${entry.name}` : entry.name; 264 + const node = entry.node; 265 + 266 + if ('type' in node && node.type === 'directory' && 'entries' in node) { 267 + collectFileCidsFromEntries(node.entries, currentPath, fileCids); 268 + } else if ('type' in node && node.type === 'file' && 'blob' in node) { 269 + const fileNode = node as File; 270 + const cid = extractBlobCid(fileNode.blob); 271 + if (cid) { 272 + fileCids[currentPath] = cid; 273 + } 274 + } 275 + } 276 + } 277 + 248 278 async function cacheFiles( 249 279 did: string, 250 280 site: string, 251 281 entries: Entry[], 252 282 pdsEndpoint: string, 253 283 pathPrefix: string, 254 - dirSuffix: string = '' 284 + dirSuffix: string = '', 285 + existingFileCids: Record<string, string> = {}, 286 + existingCacheDir?: string 255 287 ): Promise<void> { 256 - // Collect all file blob download tasks first 288 + // Collect file tasks, separating unchanged files from new/changed files 257 289 const downloadTasks: Array<() => Promise<void>> = []; 258 - 290 + const copyTasks: Array<() => Promise<void>> = []; 291 + 259 292 function collectFileTasks( 260 293 entries: Entry[], 261 294 currentPathPrefix: string ··· 268 301 collectFileTasks(node.entries, currentPath); 269 302 } else if ('type' in node && node.type === 'file' && 'blob' in node) { 270 303 const fileNode = node as File; 271 - downloadTasks.push(() => cacheFileBlob( 272 - did, 273 - site, 274 - currentPath, 275 - fileNode.blob, 276 - pdsEndpoint, 277 - fileNode.encoding, 278 - fileNode.mimeType, 279 - fileNode.base64, 280 - dirSuffix 281 - )); 304 + const cid = extractBlobCid(fileNode.blob); 305 + 306 + // Check if file is unchanged (same CID as existing cache) 307 + if (cid && existingFileCids[currentPath] === cid && existingCacheDir) { 308 + // File unchanged - copy from existing cache instead of downloading 309 + copyTasks.push(() => copyExistingFile( 310 + did, 311 + site, 312 + currentPath, 313 + dirSuffix, 314 + existingCacheDir 315 + )); 316 + } else { 317 + // File new or changed - download it 318 + downloadTasks.push(() => cacheFileBlob( 319 + did, 320 + site, 321 + currentPath, 322 + fileNode.blob, 323 + pdsEndpoint, 324 + fileNode.encoding, 325 + fileNode.mimeType, 326 + fileNode.base64, 327 + dirSuffix 328 + )); 329 + } 282 330 } 283 331 } 284 332 } 285 333 286 334 collectFileTasks(entries, pathPrefix); 287 335 288 - // Execute downloads concurrently with a limit of 3 at a time 289 - const concurrencyLimit = 3; 290 - for (let i = 0; i < downloadTasks.length; i += concurrencyLimit) { 291 - const batch = downloadTasks.slice(i, i + concurrencyLimit); 336 + console.log(`[Incremental Update] Files to copy: ${copyTasks.length}, Files to download: ${downloadTasks.length}`); 337 + 338 + // Copy unchanged files in parallel (fast local operations) 339 + const copyLimit = 10; 340 + for (let i = 0; i < copyTasks.length; i += copyLimit) { 341 + const batch = copyTasks.slice(i, i + copyLimit); 342 + await Promise.all(batch.map(task => task())); 343 + } 344 + 345 + // Download new/changed files concurrently with a limit of 3 at a time 346 + const downloadLimit = 3; 347 + for (let i = 0; i < downloadTasks.length; i += downloadLimit) { 348 + const batch = downloadTasks.slice(i, i + downloadLimit); 292 349 await Promise.all(batch.map(task => task())); 350 + } 351 + } 352 + 353 + /** 354 + * Copy an unchanged file from existing cache to new cache location 355 + */ 356 + async function copyExistingFile( 357 + did: string, 358 + site: string, 359 + filePath: string, 360 + dirSuffix: string, 361 + existingCacheDir: string 362 + ): Promise<void> { 363 + const { copyFile } = await import('fs/promises'); 364 + 365 + const sourceFile = `${existingCacheDir}/${filePath}`; 366 + const destFile = `${CACHE_DIR}/${did}/${site}${dirSuffix}/${filePath}`; 367 + const destDir = destFile.substring(0, destFile.lastIndexOf('/')); 368 + 369 + // Create destination directory if needed 370 + if (destDir && !existsSync(destDir)) { 371 + mkdirSync(destDir, { recursive: true }); 372 + } 373 + 374 + try { 375 + // Copy the file 376 + await copyFile(sourceFile, destFile); 377 + 378 + // Copy metadata file if it exists 379 + const sourceMetaFile = `${sourceFile}.meta`; 380 + const destMetaFile = `${destFile}.meta`; 381 + if (existsSync(sourceMetaFile)) { 382 + await copyFile(sourceMetaFile, destMetaFile); 383 + } 384 + 385 + console.log(`[Incremental] Copied unchanged file: ${filePath}`); 386 + } catch (err) { 387 + console.error(`[Incremental] Failed to copy file ${filePath}, will attempt download:`, err); 388 + throw err; 293 389 } 294 390 } 295 391 ··· 404 500 return existsSync(`${CACHE_DIR}/${did}/${site}`); 405 501 } 406 502 407 - async function saveCacheMetadata(did: string, rkey: string, recordCid: string, dirSuffix: string = ''): Promise<void> { 503 + async function saveCacheMetadata(did: string, rkey: string, recordCid: string, dirSuffix: string = '', fileCids?: Record<string, string>): Promise<void> { 408 504 const metadata: CacheMetadata = { 409 505 recordCid, 410 506 cachedAt: Date.now(), 411 507 did, 412 - rkey 508 + rkey, 509 + fileCids 413 510 }; 414 511 415 512 const metadataPath = `${CACHE_DIR}/${did}/${rkey}${dirSuffix}/.metadata.json`;
+3 -1
hosting-service/tsconfig.json
··· 24 24 25 25 /* Code doesn't run in DOM */ 26 26 "lib": ["es2022"], 27 - } 27 + }, 28 + "include": ["src/**/*"], 29 + "exclude": ["node_modules", "cache", "dist"] 28 30 }