wip library to store cold objects in s3, warm objects on disk, and hot objects in memory
nodejs typescript
1import type { 2 TieredStorageConfig, 3 SetOptions, 4 StorageResult, 5 SetResult, 6 StorageMetadata, 7 StorageTier, 8 AllTierStats, 9 StorageSnapshot, 10} from './types/index'; 11import { compress, decompress } from './utils/compression.js'; 12import { defaultSerialize, defaultDeserialize } from './utils/serialization.js'; 13import { calculateChecksum } from './utils/checksum.js'; 14import { matchGlob } from './utils/glob.js'; 15 16/** 17 * Main orchestrator for tiered storage system. 18 * 19 * @typeParam T - The type of data being stored 20 * 21 * @remarks 22 * Implements a cascading containment model: 23 * - **Write Strategy (Cascading Down):** Write to hot → also writes to warm and cold 24 * - **Read Strategy (Bubbling Up):** Check hot first → if miss, check warm → if miss, check cold 25 * - **Bootstrap Strategy:** Hot can bootstrap from warm, warm can bootstrap from cold 26 * 27 * The cold tier is the source of truth and is required. 28 * Hot and warm tiers are optional performance optimizations. 29 * 30 * @example 31 * ```typescript 32 * const storage = new TieredStorage({ 33 * tiers: { 34 * hot: new MemoryStorageTier({ maxSizeBytes: 100 * 1024 * 1024 }), // 100MB 35 * warm: new DiskStorageTier({ directory: './cache' }), 36 * cold: new S3StorageTier({ bucket: 'my-bucket', region: 'us-east-1' }), 37 * }, 38 * compression: true, 39 * defaultTTL: 14 * 24 * 60 * 60 * 1000, // 14 days 40 * promotionStrategy: 'lazy', 41 * }); 42 * 43 * // Store data (cascades to all tiers) 44 * await storage.set('user:123', { name: 'Alice' }); 45 * 46 * // Retrieve data (bubbles up from cold → warm → hot) 47 * const user = await storage.get('user:123'); 48 * 49 * // Invalidate all keys with prefix 50 * await storage.invalidate('user:'); 51 * ``` 52 */ 53export class TieredStorage<T = unknown> { 54 private serialize: (data: unknown) => Promise<Uint8Array>; 55 private deserialize: (data: Uint8Array) => Promise<unknown>; 56 57 constructor(private config: TieredStorageConfig) { 58 if (!config.tiers.cold) { 59 throw new Error('Cold tier is required'); 60 } 61 62 this.serialize = config.serialization?.serialize ?? defaultSerialize; 63 this.deserialize = config.serialization?.deserialize ?? defaultDeserialize; 64 } 65 66 /** 67 * Retrieve data for a key. 68 * 69 * @param key - The key to retrieve 70 * @returns The data, or null if not found or expired 71 * 72 * @remarks 73 * Checks tiers in order: hot → warm → cold. 74 * On cache miss, promotes data to upper tiers based on promotionStrategy. 75 * Automatically handles decompression and deserialization. 76 * Returns null if key doesn't exist or has expired (TTL). 77 */ 78 async get(key: string): Promise<T | null> { 79 const result = await this.getWithMetadata(key); 80 return result ? result.data : null; 81 } 82 83 /** 84 * Retrieve data with metadata and source tier information. 85 * 86 * @param key - The key to retrieve 87 * @returns The data, metadata, and source tier, or null if not found 88 * 89 * @remarks 90 * Use this when you need to know: 91 * - Which tier served the data (for observability) 92 * - Metadata like access count, TTL, checksum 93 * - When the data was created/last accessed 94 */ 95 async getWithMetadata(key: string): Promise<StorageResult<T> | null> { 96 // 1. Check hot tier first 97 if (this.config.tiers.hot) { 98 const result = await this.getFromTier(this.config.tiers.hot, key); 99 if (result) { 100 if (this.isExpired(result.metadata)) { 101 await this.delete(key); 102 return null; 103 } 104 // Fire-and-forget access stats update (non-critical) 105 void this.updateAccessStats(key, 'hot'); 106 return { 107 data: (await this.deserializeData(result.data)) as T, 108 metadata: result.metadata, 109 source: 'hot', 110 }; 111 } 112 } 113 114 // 2. Check warm tier 115 if (this.config.tiers.warm) { 116 const result = await this.getFromTier(this.config.tiers.warm, key); 117 if (result) { 118 if (this.isExpired(result.metadata)) { 119 await this.delete(key); 120 return null; 121 } 122 // Eager promotion to hot tier (awaited - guaranteed to complete) 123 if (this.config.tiers.hot && this.config.promotionStrategy === 'eager') { 124 await this.config.tiers.hot.set(key, result.data, result.metadata); 125 } 126 // Fire-and-forget access stats update (non-critical) 127 void this.updateAccessStats(key, 'warm'); 128 return { 129 data: (await this.deserializeData(result.data)) as T, 130 metadata: result.metadata, 131 source: 'warm', 132 }; 133 } 134 } 135 136 // 3. Check cold tier (source of truth) 137 const result = await this.getFromTier(this.config.tiers.cold, key); 138 if (result) { 139 if (this.isExpired(result.metadata)) { 140 await this.delete(key); 141 return null; 142 } 143 144 // Promote to warm and hot (if configured) 145 // Eager promotion is awaited to guarantee completion 146 if (this.config.promotionStrategy === 'eager') { 147 const promotions: Promise<void>[] = []; 148 if (this.config.tiers.warm) { 149 promotions.push(this.config.tiers.warm.set(key, result.data, result.metadata)); 150 } 151 if (this.config.tiers.hot) { 152 promotions.push(this.config.tiers.hot.set(key, result.data, result.metadata)); 153 } 154 await Promise.all(promotions); 155 } 156 157 // Fire-and-forget access stats update (non-critical) 158 void this.updateAccessStats(key, 'cold'); 159 return { 160 data: (await this.deserializeData(result.data)) as T, 161 metadata: result.metadata, 162 source: 'cold', 163 }; 164 } 165 166 return null; 167 } 168 169 /** 170 * Get data and metadata from a tier using the most efficient method. 171 * 172 * @remarks 173 * Uses the tier's getWithMetadata if available, otherwise falls back 174 * to separate get() and getMetadata() calls. 175 */ 176 private async getFromTier( 177 tier: StorageTier, 178 key: string, 179 ): Promise<{ data: Uint8Array; metadata: StorageMetadata } | null> { 180 // Use optimized combined method if available 181 if (tier.getWithMetadata) { 182 return tier.getWithMetadata(key); 183 } 184 185 // Fallback: separate calls 186 const data = await tier.get(key); 187 if (!data) { 188 return null; 189 } 190 const metadata = await tier.getMetadata(key); 191 if (!metadata) { 192 return null; 193 } 194 return { data, metadata }; 195 } 196 197 /** 198 * Store data with optional configuration. 199 * 200 * @param key - The key to store under 201 * @param data - The data to store 202 * @param options - Optional configuration (TTL, metadata, tier skipping) 203 * @returns Information about what was stored and where 204 * 205 * @remarks 206 * Data cascades down through tiers: 207 * - If written to hot, also written to warm and cold 208 * - If written to warm (hot skipped), also written to cold 209 * - Cold is always written (source of truth) 210 * 211 * Use `skipTiers` to control placement. For example: 212 * - Large files: `skipTiers: ['hot']` to avoid memory bloat 213 * - Critical small files: Write to all tiers for fastest access 214 * 215 * Automatically handles serialization and optional compression. 216 */ 217 async set(key: string, data: T, options?: SetOptions): Promise<SetResult> { 218 // 1. Serialize data 219 const serialized = await this.serialize(data); 220 221 // 2. Optionally compress 222 const finalData = this.config.compression ? await compress(serialized) : serialized; 223 224 // 3. Create metadata 225 const metadata = this.createMetadata(key, finalData, options); 226 227 // 4. Determine which tiers to write to 228 const allowedTiers = this.getTiersForKey(key, options?.skipTiers); 229 230 // 5. Write to tiers 231 const tiersWritten: ('hot' | 'warm' | 'cold')[] = []; 232 233 if (this.config.tiers.hot && allowedTiers.includes('hot')) { 234 await this.config.tiers.hot.set(key, finalData, metadata); 235 tiersWritten.push('hot'); 236 } 237 238 if (this.config.tiers.warm && allowedTiers.includes('warm')) { 239 await this.config.tiers.warm.set(key, finalData, metadata); 240 tiersWritten.push('warm'); 241 } 242 243 // Always write to cold (source of truth) 244 await this.config.tiers.cold.set(key, finalData, metadata); 245 tiersWritten.push('cold'); 246 247 return { key, metadata, tiersWritten }; 248 } 249 250 /** 251 * Determine which tiers a key should be written to. 252 * 253 * @param key - The key being stored 254 * @param skipTiers - Explicit tiers to skip (overrides placement rules) 255 * @returns Array of tiers to write to 256 * 257 * @remarks 258 * Priority: skipTiers option > placementRules > all configured tiers 259 */ 260 private getTiersForKey( 261 key: string, 262 skipTiers?: ('hot' | 'warm')[], 263 ): ('hot' | 'warm' | 'cold')[] { 264 // If explicit skipTiers provided, use that 265 if (skipTiers && skipTiers.length > 0) { 266 const allTiers: ('hot' | 'warm' | 'cold')[] = ['hot', 'warm', 'cold']; 267 return allTiers.filter((t) => !skipTiers.includes(t as 'hot' | 'warm')); 268 } 269 270 // Check placement rules 271 if (this.config.placementRules) { 272 for (const rule of this.config.placementRules) { 273 if (matchGlob(rule.pattern, key)) { 274 // Ensure cold is always included 275 if (!rule.tiers.includes('cold')) { 276 return [...rule.tiers, 'cold']; 277 } 278 return rule.tiers; 279 } 280 } 281 } 282 283 // Default: write to all configured tiers 284 return ['hot', 'warm', 'cold']; 285 } 286 287 /** 288 * Delete data from all tiers. 289 * 290 * @param key - The key to delete 291 * 292 * @remarks 293 * Deletes from all configured tiers in parallel. 294 * Does not throw if the key doesn't exist. 295 */ 296 async delete(key: string): Promise<void> { 297 await Promise.all([ 298 this.config.tiers.hot?.delete(key), 299 this.config.tiers.warm?.delete(key), 300 this.config.tiers.cold.delete(key), 301 ]); 302 } 303 304 /** 305 * Check if a key exists in any tier. 306 * 307 * @param key - The key to check 308 * @returns true if the key exists and hasn't expired 309 * 310 * @remarks 311 * Checks tiers in order: hot → warm → cold. 312 * Returns false if key exists but has expired. 313 */ 314 async exists(key: string): Promise<boolean> { 315 // Check hot first (fastest) 316 if (this.config.tiers.hot && (await this.config.tiers.hot.exists(key))) { 317 const metadata = await this.config.tiers.hot.getMetadata(key); 318 if (metadata && !this.isExpired(metadata)) { 319 return true; 320 } 321 } 322 323 // Check warm 324 if (this.config.tiers.warm && (await this.config.tiers.warm.exists(key))) { 325 const metadata = await this.config.tiers.warm.getMetadata(key); 326 if (metadata && !this.isExpired(metadata)) { 327 return true; 328 } 329 } 330 331 // Check cold (source of truth) 332 if (await this.config.tiers.cold.exists(key)) { 333 const metadata = await this.config.tiers.cold.getMetadata(key); 334 if (metadata && !this.isExpired(metadata)) { 335 return true; 336 } 337 } 338 339 return false; 340 } 341 342 /** 343 * Renew TTL for a key. 344 * 345 * @param key - The key to touch 346 * @param ttlMs - Optional new TTL in milliseconds (uses default if not provided) 347 * 348 * @remarks 349 * Updates the TTL and lastAccessed timestamp in all tiers. 350 * Useful for implementing "keep alive" behavior for actively used keys. 351 * Does nothing if no TTL is configured. 352 */ 353 async touch(key: string, ttlMs?: number): Promise<void> { 354 const ttl = ttlMs ?? this.config.defaultTTL; 355 if (!ttl) return; 356 357 const newTTL = new Date(Date.now() + ttl); 358 359 for (const tier of [ 360 this.config.tiers.hot, 361 this.config.tiers.warm, 362 this.config.tiers.cold, 363 ]) { 364 if (!tier) continue; 365 366 const metadata = await tier.getMetadata(key); 367 if (metadata) { 368 metadata.ttl = newTTL; 369 metadata.lastAccessed = new Date(); 370 await tier.setMetadata(key, metadata); 371 } 372 } 373 } 374 375 /** 376 * Invalidate all keys matching a prefix. 377 * 378 * @param prefix - The prefix to match (e.g., 'user:' matches 'user:123', 'user:456') 379 * @returns Number of keys deleted 380 * 381 * @remarks 382 * Useful for bulk invalidation: 383 * - Site invalidation: `invalidate('site:abc:')` 384 * - User invalidation: `invalidate('user:123:')` 385 * - Global invalidation: `invalidate('')` (deletes everything) 386 * 387 * Deletes from all tiers in parallel for efficiency. 388 */ 389 async invalidate(prefix: string): Promise<number> { 390 const keysToDelete = new Set<string>(); 391 392 // Collect all keys matching prefix from all tiers 393 if (this.config.tiers.hot) { 394 for await (const key of this.config.tiers.hot.listKeys(prefix)) { 395 keysToDelete.add(key); 396 } 397 } 398 399 if (this.config.tiers.warm) { 400 for await (const key of this.config.tiers.warm.listKeys(prefix)) { 401 keysToDelete.add(key); 402 } 403 } 404 405 for await (const key of this.config.tiers.cold.listKeys(prefix)) { 406 keysToDelete.add(key); 407 } 408 409 // Delete from all tiers in parallel 410 const keys = Array.from(keysToDelete); 411 412 await Promise.all([ 413 this.config.tiers.hot?.deleteMany(keys), 414 this.config.tiers.warm?.deleteMany(keys), 415 this.config.tiers.cold.deleteMany(keys), 416 ]); 417 418 return keys.length; 419 } 420 421 /** 422 * List all keys, optionally filtered by prefix. 423 * 424 * @param prefix - Optional prefix to filter keys 425 * @returns Async iterator of keys 426 * 427 * @remarks 428 * Returns keys from the cold tier (source of truth). 429 * Memory-efficient - streams keys rather than loading all into memory. 430 * 431 * @example 432 * ```typescript 433 * for await (const key of storage.listKeys('user:')) { 434 * console.log(key); 435 * } 436 * ``` 437 */ 438 async *listKeys(prefix?: string): AsyncIterableIterator<string> { 439 // List from cold tier (source of truth) 440 for await (const key of this.config.tiers.cold.listKeys(prefix)) { 441 yield key; 442 } 443 } 444 445 /** 446 * Get aggregated statistics across all tiers. 447 * 448 * @returns Statistics including size, item count, hits, misses, hit rate 449 * 450 * @remarks 451 * Useful for monitoring and capacity planning. 452 * Hit rate is calculated as: hits / (hits + misses). 453 */ 454 async getStats(): Promise<AllTierStats> { 455 const [hot, warm, cold] = await Promise.all([ 456 this.config.tiers.hot?.getStats(), 457 this.config.tiers.warm?.getStats(), 458 this.config.tiers.cold.getStats(), 459 ]); 460 461 const totalHits = (hot?.hits ?? 0) + (warm?.hits ?? 0) + (cold?.hits ?? 0); 462 const totalMisses = (hot?.misses ?? 0) + (warm?.misses ?? 0) + (cold?.misses ?? 0); 463 const hitRate = totalHits + totalMisses > 0 ? totalHits / (totalHits + totalMisses) : 0; 464 465 return { 466 ...(hot && { hot }), 467 ...(warm && { warm }), 468 cold, 469 totalHits, 470 totalMisses, 471 hitRate, 472 }; 473 } 474 475 /** 476 * Clear all data from all tiers. 477 * 478 * @remarks 479 * Use with extreme caution! This will delete all data in the entire storage system. 480 * Cannot be undone. 481 */ 482 async clear(): Promise<void> { 483 await Promise.all([ 484 this.config.tiers.hot?.clear(), 485 this.config.tiers.warm?.clear(), 486 this.config.tiers.cold.clear(), 487 ]); 488 } 489 490 /** 491 * Clear a specific tier. 492 * 493 * @param tier - Which tier to clear 494 * 495 * @remarks 496 * Useful for: 497 * - Clearing hot tier to test warm/cold performance 498 * - Clearing warm tier to force rebuilding from cold 499 * - Clearing cold tier to start fresh (⚠️ loses source of truth!) 500 */ 501 async clearTier(tier: 'hot' | 'warm' | 'cold'): Promise<void> { 502 switch (tier) { 503 case 'hot': 504 await this.config.tiers.hot?.clear(); 505 break; 506 case 'warm': 507 await this.config.tiers.warm?.clear(); 508 break; 509 case 'cold': 510 await this.config.tiers.cold.clear(); 511 break; 512 } 513 } 514 515 /** 516 * Export metadata snapshot for backup or migration. 517 * 518 * @returns Snapshot containing all keys, metadata, and statistics 519 * 520 * @remarks 521 * The snapshot includes metadata but not the actual data (data remains in tiers). 522 * Useful for: 523 * - Backup and restore 524 * - Migration between storage systems 525 * - Auditing and compliance 526 */ 527 async export(): Promise<StorageSnapshot> { 528 const keys: string[] = []; 529 const metadata: Record<string, StorageMetadata> = {}; 530 531 // Export from cold tier (source of truth) 532 for await (const key of this.config.tiers.cold.listKeys()) { 533 keys.push(key); 534 const meta = await this.config.tiers.cold.getMetadata(key); 535 if (meta) { 536 metadata[key] = meta; 537 } 538 } 539 540 const stats = await this.getStats(); 541 542 return { 543 version: 1, 544 exportedAt: new Date(), 545 keys, 546 metadata, 547 stats, 548 }; 549 } 550 551 /** 552 * Import metadata snapshot. 553 * 554 * @param snapshot - Snapshot to import 555 * 556 * @remarks 557 * Validates version compatibility before importing. 558 * Only imports metadata - assumes data already exists in cold tier. 559 */ 560 async import(snapshot: StorageSnapshot): Promise<void> { 561 if (snapshot.version !== 1) { 562 throw new Error(`Unsupported snapshot version: ${snapshot.version}`); 563 } 564 565 // Import metadata into all configured tiers 566 for (const key of snapshot.keys) { 567 const metadata = snapshot.metadata[key]; 568 if (!metadata) continue; 569 570 if (this.config.tiers.hot) { 571 await this.config.tiers.hot.setMetadata(key, metadata); 572 } 573 574 if (this.config.tiers.warm) { 575 await this.config.tiers.warm.setMetadata(key, metadata); 576 } 577 578 await this.config.tiers.cold.setMetadata(key, metadata); 579 } 580 } 581 582 /** 583 * Bootstrap hot tier from warm tier. 584 * 585 * @param limit - Optional limit on number of items to load 586 * @returns Number of items loaded 587 * 588 * @remarks 589 * Loads the most frequently accessed items from warm into hot. 590 * Useful for warming up the cache after a restart. 591 * Items are sorted by: accessCount * lastAccessed timestamp (higher is better). 592 */ 593 async bootstrapHot(limit?: number): Promise<number> { 594 if (!this.config.tiers.hot || !this.config.tiers.warm) { 595 return 0; 596 } 597 598 let loaded = 0; 599 const keyMetadata: Array<[string, StorageMetadata]> = []; 600 601 // Load metadata for all keys 602 for await (const key of this.config.tiers.warm.listKeys()) { 603 const metadata = await this.config.tiers.warm.getMetadata(key); 604 if (metadata) { 605 keyMetadata.push([key, metadata]); 606 } 607 } 608 609 // Sort by access count * recency (simple scoring) 610 keyMetadata.sort((a, b) => { 611 const scoreA = a[1].accessCount * a[1].lastAccessed.getTime(); 612 const scoreB = b[1].accessCount * b[1].lastAccessed.getTime(); 613 return scoreB - scoreA; 614 }); 615 616 // Load top N keys into hot tier 617 const keysToLoad = limit ? keyMetadata.slice(0, limit) : keyMetadata; 618 619 for (const [key, metadata] of keysToLoad) { 620 const data = await this.config.tiers.warm.get(key); 621 if (data) { 622 await this.config.tiers.hot.set(key, data, metadata); 623 loaded++; 624 } 625 } 626 627 return loaded; 628 } 629 630 /** 631 * Bootstrap warm tier from cold tier. 632 * 633 * @param options - Optional limit and date filter 634 * @returns Number of items loaded 635 * 636 * @remarks 637 * Loads recent items from cold into warm. 638 * Useful for: 639 * - Initial cache population 640 * - Recovering from warm tier failure 641 * - Migrating to a new warm tier implementation 642 */ 643 async bootstrapWarm(options?: { limit?: number; sinceDate?: Date }): Promise<number> { 644 if (!this.config.tiers.warm) { 645 return 0; 646 } 647 648 let loaded = 0; 649 650 for await (const key of this.config.tiers.cold.listKeys()) { 651 const metadata = await this.config.tiers.cold.getMetadata(key); 652 if (!metadata) continue; 653 654 // Skip if too old 655 if (options?.sinceDate && metadata.lastAccessed < options.sinceDate) { 656 continue; 657 } 658 659 const data = await this.config.tiers.cold.get(key); 660 if (data) { 661 await this.config.tiers.warm.set(key, data, metadata); 662 loaded++; 663 664 if (options?.limit && loaded >= options.limit) { 665 break; 666 } 667 } 668 } 669 670 return loaded; 671 } 672 673 /** 674 * Check if data has expired based on TTL. 675 */ 676 private isExpired(metadata: StorageMetadata): boolean { 677 if (!metadata.ttl) return false; 678 return Date.now() > metadata.ttl.getTime(); 679 } 680 681 /** 682 * Update access statistics for a key. 683 */ 684 private async updateAccessStats(key: string, tier: 'hot' | 'warm' | 'cold'): Promise<void> { 685 const tierObj = 686 tier === 'hot' 687 ? this.config.tiers.hot 688 : tier === 'warm' 689 ? this.config.tiers.warm 690 : this.config.tiers.cold; 691 692 if (!tierObj) return; 693 694 const metadata = await tierObj.getMetadata(key); 695 if (metadata) { 696 metadata.lastAccessed = new Date(); 697 metadata.accessCount++; 698 await tierObj.setMetadata(key, metadata); 699 } 700 } 701 702 /** 703 * Create metadata for new data. 704 */ 705 private createMetadata(key: string, data: Uint8Array, options?: SetOptions): StorageMetadata { 706 const now = new Date(); 707 const ttl = options?.ttl ?? this.config.defaultTTL; 708 709 const metadata: StorageMetadata = { 710 key, 711 size: data.byteLength, 712 createdAt: now, 713 lastAccessed: now, 714 accessCount: 0, 715 compressed: this.config.compression ?? false, 716 checksum: calculateChecksum(data), 717 }; 718 719 if (ttl) { 720 metadata.ttl = new Date(now.getTime() + ttl); 721 } 722 723 if (options?.metadata) { 724 metadata.customMetadata = options.metadata; 725 } 726 727 return metadata; 728 } 729 730 /** 731 * Deserialize data, handling compression automatically. 732 */ 733 private async deserializeData(data: Uint8Array): Promise<unknown> { 734 // Decompress if needed (check for gzip magic bytes) 735 const finalData = 736 this.config.compression && data[0] === 0x1f && data[1] === 0x8b 737 ? await decompress(data) 738 : data; 739 740 return this.deserialize(finalData); 741 } 742}