wip library to store cold objects in s3, warm objects on disk, and hot objects in memory
nodejs typescript
at main 15 kB view raw
1/** 2 * Metadata associated with stored data in a tier. 3 * 4 * @remarks 5 * This metadata is stored alongside the actual data and is used for: 6 * - TTL management and expiration 7 * - Access tracking for LRU/eviction policies 8 * - Data integrity verification via checksum 9 * - Content type information for HTTP serving 10 */ 11export interface StorageMetadata { 12 /** Original key used to store the data (human-readable) */ 13 key: string; 14 15 /** Size of the data in bytes (uncompressed size) */ 16 size: number; 17 18 /** Timestamp when the data was first created */ 19 createdAt: Date; 20 21 /** Timestamp when the data was last accessed */ 22 lastAccessed: Date; 23 24 /** Number of times this data has been accessed */ 25 accessCount: number; 26 27 /** Optional expiration timestamp. Data expires when current time > ttl */ 28 ttl?: Date; 29 30 /** Whether the data is compressed (e.g., with gzip) */ 31 compressed: boolean; 32 33 /** SHA256 checksum of the data for integrity verification */ 34 checksum: string; 35 36 /** Optional MIME type (e.g., 'text/html', 'application/json') */ 37 mimeType?: string; 38 39 /** Optional encoding (e.g., 'gzip', 'base64') */ 40 encoding?: string; 41 42 /** User-defined metadata fields */ 43 customMetadata?: Record<string, string>; 44} 45 46/** 47 * Statistics for a single storage tier. 48 * 49 * @remarks 50 * Used for monitoring cache performance and capacity planning. 51 */ 52export interface TierStats { 53 /** Total bytes stored in this tier */ 54 bytes: number; 55 56 /** Total number of items stored in this tier */ 57 items: number; 58 59 /** Number of cache hits (only tracked if tier implements hit tracking) */ 60 hits?: number; 61 62 /** Number of cache misses (only tracked if tier implements miss tracking) */ 63 misses?: number; 64 65 /** Number of evictions due to size/count limits (only tracked if tier implements eviction) */ 66 evictions?: number; 67} 68 69/** 70 * Aggregated statistics across all configured tiers. 71 * 72 * @remarks 73 * Provides a complete view of cache performance across the entire storage hierarchy. 74 */ 75export interface AllTierStats { 76 /** Statistics for hot tier (if configured) */ 77 hot?: TierStats; 78 79 /** Statistics for warm tier (if configured) */ 80 warm?: TierStats; 81 82 /** Statistics for cold tier (always present) */ 83 cold: TierStats; 84 85 /** Total hits across all tiers */ 86 totalHits: number; 87 88 /** Total misses across all tiers */ 89 totalMisses: number; 90 91 /** Hit rate as a percentage (0-1) */ 92 hitRate: number; 93} 94 95/** 96 * Interface that all storage tier implementations must satisfy. 97 * 98 * @remarks 99 * This is the core abstraction that allows pluggable backends. 100 * Implementations can be memory-based (Map, Redis), disk-based (filesystem, SQLite), 101 * or cloud-based (S3, R2, etc.). 102 * 103 * @example 104 * ```typescript 105 * class RedisStorageTier implements StorageTier { 106 * constructor(private client: RedisClient) {} 107 * 108 * async get(key: string): Promise<Uint8Array | null> { 109 * const buffer = await this.client.getBuffer(key); 110 * return buffer ? new Uint8Array(buffer) : null; 111 * } 112 * 113 * // ... implement other methods 114 * } 115 * ``` 116 */ 117/** 118 * Result from a combined get+metadata operation on a tier. 119 */ 120export interface TierGetResult { 121 /** The retrieved data */ 122 data: Uint8Array; 123 /** Metadata associated with the data */ 124 metadata: StorageMetadata; 125} 126 127/** 128 * Result from a streaming get operation on a tier. 129 */ 130export interface TierStreamResult { 131 /** Readable stream of the data */ 132 stream: NodeJS.ReadableStream; 133 /** Metadata associated with the data */ 134 metadata: StorageMetadata; 135} 136 137/** 138 * Result from a streaming get operation on TieredStorage. 139 * 140 * @remarks 141 * Includes the source tier for observability. 142 */ 143export interface StreamResult { 144 /** Readable stream of the data */ 145 stream: NodeJS.ReadableStream; 146 /** Metadata associated with the data */ 147 metadata: StorageMetadata; 148 /** Which tier the data was served from */ 149 source: 'hot' | 'warm' | 'cold'; 150} 151 152/** 153 * Options for streaming set operations. 154 */ 155export interface StreamSetOptions extends SetOptions { 156 /** 157 * Size of the data being streamed in bytes. 158 * 159 * @remarks 160 * Required for streaming writes because the size cannot be determined 161 * until the stream is fully consumed. This is used for: 162 * - Metadata creation before streaming starts 163 * - Capacity checks and eviction in tiers with size limits 164 */ 165 size: number; 166 167 /** 168 * Pre-computed checksum of the data. 169 * 170 * @remarks 171 * If not provided, checksum will be computed during streaming. 172 * Providing it upfront is useful when the checksum is already known 173 * (e.g., from a previous upload or external source). 174 */ 175 checksum?: string; 176 177 /** 178 * MIME type of the content. 179 */ 180 mimeType?: string; 181} 182 183export interface StorageTier { 184 /** 185 * Retrieve data for a key. 186 * 187 * @param key - The key to retrieve 188 * @returns The data as a Uint8Array, or null if not found 189 */ 190 get(key: string): Promise<Uint8Array | null>; 191 192 /** 193 * Retrieve data and metadata together in a single operation. 194 * 195 * @param key - The key to retrieve 196 * @returns The data and metadata, or null if not found 197 * 198 * @remarks 199 * This is more efficient than calling get() and getMetadata() separately, 200 * especially for disk and network-based tiers. 201 */ 202 getWithMetadata?(key: string): Promise<TierGetResult | null>; 203 204 /** 205 * Retrieve data as a readable stream with metadata. 206 * 207 * @param key - The key to retrieve 208 * @returns A readable stream and metadata, or null if not found 209 * 210 * @remarks 211 * Use this for large files to avoid loading entire content into memory. 212 * The stream must be consumed or destroyed by the caller. 213 */ 214 getStream?(key: string): Promise<TierStreamResult | null>; 215 216 /** 217 * Store data from a readable stream. 218 * 219 * @param key - The key to store under 220 * @param stream - Readable stream of data to store 221 * @param metadata - Metadata to store alongside the data 222 * 223 * @remarks 224 * Use this for large files to avoid loading entire content into memory. 225 * The stream will be fully consumed by this operation. 226 */ 227 setStream?( 228 key: string, 229 stream: NodeJS.ReadableStream, 230 metadata: StorageMetadata, 231 ): Promise<void>; 232 233 /** 234 * Store data with associated metadata. 235 * 236 * @param key - The key to store under 237 * @param data - The data to store (as Uint8Array) 238 * @param metadata - Metadata to store alongside the data 239 * 240 * @remarks 241 * If the key already exists, it should be overwritten. 242 */ 243 set(key: string, data: Uint8Array, metadata: StorageMetadata): Promise<void>; 244 245 /** 246 * Delete data for a key. 247 * 248 * @param key - The key to delete 249 * 250 * @remarks 251 * Should not throw if the key doesn't exist. 252 */ 253 delete(key: string): Promise<void>; 254 255 /** 256 * Check if a key exists in this tier. 257 * 258 * @param key - The key to check 259 * @returns true if the key exists, false otherwise 260 */ 261 exists(key: string): Promise<boolean>; 262 263 /** 264 * List all keys in this tier, optionally filtered by prefix. 265 * 266 * @param prefix - Optional prefix to filter keys (e.g., 'user:' matches 'user:123', 'user:456') 267 * @returns An async iterator of keys 268 * 269 * @remarks 270 * This should be memory-efficient and stream keys rather than loading all into memory. 271 * Useful for prefix-based invalidation and cache warming. 272 * 273 * @example 274 * ```typescript 275 * for await (const key of tier.listKeys('site:')) { 276 * console.log(key); // 'site:abc', 'site:xyz', etc. 277 * } 278 * ``` 279 */ 280 listKeys(prefix?: string): AsyncIterableIterator<string>; 281 282 /** 283 * Delete multiple keys in a single operation. 284 * 285 * @param keys - Array of keys to delete 286 * 287 * @remarks 288 * This is more efficient than calling delete() in a loop. 289 * Implementations should batch deletions where possible. 290 */ 291 deleteMany(keys: string[]): Promise<void>; 292 293 /** 294 * Retrieve metadata for a key without fetching the data. 295 * 296 * @param key - The key to get metadata for 297 * @returns The metadata, or null if not found 298 * 299 * @remarks 300 * This is useful for checking TTL, access counts, etc. without loading large data. 301 */ 302 getMetadata(key: string): Promise<StorageMetadata | null>; 303 304 /** 305 * Update metadata for a key without modifying the data. 306 * 307 * @param key - The key to update metadata for 308 * @param metadata - The new metadata 309 * 310 * @remarks 311 * Useful for updating TTL (via touch()) or access counts. 312 */ 313 setMetadata(key: string, metadata: StorageMetadata): Promise<void>; 314 315 /** 316 * Get statistics about this tier. 317 * 318 * @returns Statistics including size, item count, hits, misses, etc. 319 */ 320 getStats(): Promise<TierStats>; 321 322 /** 323 * Clear all data from this tier. 324 * 325 * @remarks 326 * Use with caution! This will delete all data in the tier. 327 */ 328 clear(): Promise<void>; 329} 330 331/** 332 * Rule for automatic tier placement based on key patterns. 333 * 334 * @remarks 335 * Rules are evaluated in order. First matching rule wins. 336 * Use this to define which keys go to which tiers without 337 * specifying skipTiers on every set() call. 338 * 339 * @example 340 * ```typescript 341 * placementRules: [ 342 * { pattern: 'index.html', tiers: ['hot', 'warm', 'cold'] }, 343 * { pattern: '*.html', tiers: ['warm', 'cold'] }, 344 * { pattern: 'assets/**', tiers: ['warm', 'cold'] }, 345 * { pattern: '**', tiers: ['warm', 'cold'] }, // default 346 * ] 347 * ``` 348 */ 349export interface PlacementRule { 350 /** 351 * Glob pattern to match against keys. 352 * 353 * @remarks 354 * Supports basic globs: 355 * - `*` matches any characters except `/` 356 * - `**` matches any characters including `/` 357 * - Exact matches work too: `index.html` 358 */ 359 pattern: string; 360 361 /** 362 * Which tiers to write to for matching keys. 363 * 364 * @remarks 365 * Cold is always included (source of truth). 366 * Use `['hot', 'warm', 'cold']` for critical files. 367 * Use `['warm', 'cold']` for large files. 368 * Use `['cold']` for archival only. 369 */ 370 tiers: ('hot' | 'warm' | 'cold')[]; 371} 372 373/** 374 * Configuration for the TieredStorage system. 375 * 376 * @typeParam T - The type of data being stored (for serialization) 377 * 378 * @remarks 379 * The tiered storage system uses a cascading containment model: 380 * - Hot tier (optional): Fastest, smallest capacity (memory/Redis) 381 * - Warm tier (optional): Medium speed, medium capacity (disk/database) 382 * - Cold tier (required): Slowest, unlimited capacity (S3/object storage) 383 * 384 * Data flows down on writes (hot → warm → cold) and bubbles up on reads (cold → warm → hot). 385 */ 386export interface TieredStorageConfig { 387 /** Storage tier configuration */ 388 tiers: { 389 /** Optional hot tier - fastest, smallest capacity (e.g., in-memory, Redis) */ 390 hot?: StorageTier; 391 392 /** Optional warm tier - medium speed, medium capacity (e.g., disk, SQLite, Postgres) */ 393 warm?: StorageTier; 394 395 /** Required cold tier - slowest, largest capacity (e.g., S3, R2, object storage) */ 396 cold: StorageTier; 397 }; 398 399 /** Rules for automatic tier placement based on key patterns. First match wins. */ 400 placementRules?: PlacementRule[]; 401 402 /** 403 * Whether to automatically compress data before storing. 404 * 405 * @defaultValue false 406 * 407 * @remarks 408 * Uses gzip compression. Compression is transparent - data is automatically 409 * decompressed on retrieval. The `compressed` flag in metadata indicates compression state. 410 */ 411 compression?: boolean; 412 413 /** 414 * Default TTL (time-to-live) in milliseconds. 415 * 416 * @remarks 417 * Data will expire after this duration. Can be overridden per-key via SetOptions. 418 * If not set, data never expires. 419 */ 420 defaultTTL?: number; 421 422 /** 423 * Strategy for promoting data to upper tiers on cache miss. 424 * 425 * @defaultValue 'lazy' 426 * 427 * @remarks 428 * - 'eager': Immediately promote data to all upper tiers on read 429 * - 'lazy': Don't automatically promote; rely on explicit promotion or next write 430 * 431 * Eager promotion increases hot tier hit rate but adds write overhead. 432 * Lazy promotion reduces writes but may serve from lower tiers more often. 433 */ 434 promotionStrategy?: 'eager' | 'lazy'; 435 436 /** 437 * Custom serialization/deserialization functions. 438 * 439 * @remarks 440 * By default, JSON serialization is used. Provide custom functions for: 441 * - Non-JSON types (e.g., Buffer, custom classes) 442 * - Performance optimization (e.g., msgpack, protobuf) 443 * - Encryption (serialize includes encryption, deserialize includes decryption) 444 */ 445 serialization?: { 446 /** Convert data to Uint8Array for storage */ 447 serialize: (data: unknown) => Promise<Uint8Array>; 448 449 /** Convert Uint8Array back to original data */ 450 deserialize: (data: Uint8Array) => Promise<unknown>; 451 }; 452} 453 454/** 455 * Options for setting data in the cache. 456 * 457 * @remarks 458 * These options allow fine-grained control over where and how data is stored. 459 */ 460export interface SetOptions { 461 /** 462 * Custom TTL in milliseconds for this specific key. 463 * 464 * @remarks 465 * Overrides the default TTL from TieredStorageConfig. 466 * Data will expire after this duration from the current time. 467 */ 468 ttl?: number; 469 470 /** 471 * Custom metadata to attach to this key. 472 * 473 * @remarks 474 * Merged with system-generated metadata (size, checksum, timestamps). 475 * Useful for storing application-specific information like content-type, encoding, etc. 476 */ 477 metadata?: Record<string, string>; 478 479 /** 480 * Skip writing to specific tiers. 481 * 482 * @remarks 483 * Useful for controlling which tiers receive data. For example: 484 * - Large files: `skipTiers: ['hot']` to avoid filling memory 485 * - Small critical files: Write to hot only for fastest access 486 * 487 * Note: Cold tier can never be skipped (it's the source of truth). 488 * 489 * @example 490 * ```typescript 491 * // Store large file only in warm and cold (skip memory) 492 * await storage.set('large-video.mp4', videoData, { skipTiers: ['hot'] }); 493 * 494 * // Store index.html in all tiers for fast access 495 * await storage.set('index.html', htmlData); // No skipping 496 * ``` 497 */ 498 skipTiers?: ('hot' | 'warm')[]; 499} 500 501/** 502 * Result from retrieving data with metadata. 503 * 504 * @typeParam T - The type of data being retrieved 505 * 506 * @remarks 507 * Includes both the data and information about where it was served from. 508 */ 509export interface StorageResult<T> { 510 /** The retrieved data */ 511 data: T; 512 513 /** Metadata associated with the data */ 514 metadata: StorageMetadata; 515 516 /** Which tier the data was served from */ 517 source: 'hot' | 'warm' | 'cold'; 518} 519 520/** 521 * Result from setting data in the cache. 522 * 523 * @remarks 524 * Indicates which tiers successfully received the data. 525 */ 526export interface SetResult { 527 /** The key that was set */ 528 key: string; 529 530 /** Metadata that was stored with the data */ 531 metadata: StorageMetadata; 532 533 /** Which tiers received the data */ 534 tiersWritten: ('hot' | 'warm' | 'cold')[]; 535} 536 537/** 538 * Snapshot of the entire storage state. 539 * 540 * @remarks 541 * Used for export/import, backup, and migration scenarios. 542 * The snapshot includes metadata but not the actual data (data remains in tiers). 543 */ 544export interface StorageSnapshot { 545 /** Snapshot format version (for compatibility) */ 546 version: number; 547 548 /** When this snapshot was created */ 549 exportedAt: Date; 550 551 /** All keys present in cold tier (source of truth) */ 552 keys: string[]; 553 554 /** Metadata for each key */ 555 metadata: Record<string, StorageMetadata>; 556 557 /** Statistics at time of export */ 558 stats: AllTierStats; 559}