wip library to store cold objects in s3, warm objects on disk, and hot objects in memory
nodejs
typescript
1/**
2 * Metadata associated with stored data in a tier.
3 *
4 * @remarks
5 * This metadata is stored alongside the actual data and is used for:
6 * - TTL management and expiration
7 * - Access tracking for LRU/eviction policies
8 * - Data integrity verification via checksum
9 * - Content type information for HTTP serving
10 */
11export interface StorageMetadata {
12 /** Original key used to store the data (human-readable) */
13 key: string;
14
15 /** Size of the data in bytes (uncompressed size) */
16 size: number;
17
18 /** Timestamp when the data was first created */
19 createdAt: Date;
20
21 /** Timestamp when the data was last accessed */
22 lastAccessed: Date;
23
24 /** Number of times this data has been accessed */
25 accessCount: number;
26
27 /** Optional expiration timestamp. Data expires when current time > ttl */
28 ttl?: Date;
29
30 /** Whether the data is compressed (e.g., with gzip) */
31 compressed: boolean;
32
33 /** SHA256 checksum of the data for integrity verification */
34 checksum: string;
35
36 /** Optional MIME type (e.g., 'text/html', 'application/json') */
37 mimeType?: string;
38
39 /** Optional encoding (e.g., 'gzip', 'base64') */
40 encoding?: string;
41
42 /** User-defined metadata fields */
43 customMetadata?: Record<string, string>;
44}
45
46/**
47 * Statistics for a single storage tier.
48 *
49 * @remarks
50 * Used for monitoring cache performance and capacity planning.
51 */
52export interface TierStats {
53 /** Total bytes stored in this tier */
54 bytes: number;
55
56 /** Total number of items stored in this tier */
57 items: number;
58
59 /** Number of cache hits (only tracked if tier implements hit tracking) */
60 hits?: number;
61
62 /** Number of cache misses (only tracked if tier implements miss tracking) */
63 misses?: number;
64
65 /** Number of evictions due to size/count limits (only tracked if tier implements eviction) */
66 evictions?: number;
67}
68
69/**
70 * Aggregated statistics across all configured tiers.
71 *
72 * @remarks
73 * Provides a complete view of cache performance across the entire storage hierarchy.
74 */
75export interface AllTierStats {
76 /** Statistics for hot tier (if configured) */
77 hot?: TierStats;
78
79 /** Statistics for warm tier (if configured) */
80 warm?: TierStats;
81
82 /** Statistics for cold tier (always present) */
83 cold: TierStats;
84
85 /** Total hits across all tiers */
86 totalHits: number;
87
88 /** Total misses across all tiers */
89 totalMisses: number;
90
91 /** Hit rate as a percentage (0-1) */
92 hitRate: number;
93}
94
95/**
96 * Interface that all storage tier implementations must satisfy.
97 *
98 * @remarks
99 * This is the core abstraction that allows pluggable backends.
100 * Implementations can be memory-based (Map, Redis), disk-based (filesystem, SQLite),
101 * or cloud-based (S3, R2, etc.).
102 *
103 * @example
104 * ```typescript
105 * class RedisStorageTier implements StorageTier {
106 * constructor(private client: RedisClient) {}
107 *
108 * async get(key: string): Promise<Uint8Array | null> {
109 * const buffer = await this.client.getBuffer(key);
110 * return buffer ? new Uint8Array(buffer) : null;
111 * }
112 *
113 * // ... implement other methods
114 * }
115 * ```
116 */
117/**
118 * Result from a combined get+metadata operation on a tier.
119 */
120export interface TierGetResult {
121 /** The retrieved data */
122 data: Uint8Array;
123 /** Metadata associated with the data */
124 metadata: StorageMetadata;
125}
126
127/**
128 * Result from a streaming get operation on a tier.
129 */
130export interface TierStreamResult {
131 /** Readable stream of the data */
132 stream: NodeJS.ReadableStream;
133 /** Metadata associated with the data */
134 metadata: StorageMetadata;
135}
136
137/**
138 * Result from a streaming get operation on TieredStorage.
139 *
140 * @remarks
141 * Includes the source tier for observability.
142 */
143export interface StreamResult {
144 /** Readable stream of the data */
145 stream: NodeJS.ReadableStream;
146 /** Metadata associated with the data */
147 metadata: StorageMetadata;
148 /** Which tier the data was served from */
149 source: 'hot' | 'warm' | 'cold';
150}
151
152/**
153 * Options for streaming set operations.
154 */
155export interface StreamSetOptions extends SetOptions {
156 /**
157 * Size of the data being streamed in bytes.
158 *
159 * @remarks
160 * Required for streaming writes because the size cannot be determined
161 * until the stream is fully consumed. This is used for:
162 * - Metadata creation before streaming starts
163 * - Capacity checks and eviction in tiers with size limits
164 */
165 size: number;
166
167 /**
168 * Pre-computed checksum of the data.
169 *
170 * @remarks
171 * If not provided, checksum will be computed during streaming.
172 * Providing it upfront is useful when the checksum is already known
173 * (e.g., from a previous upload or external source).
174 */
175 checksum?: string;
176
177 /**
178 * MIME type of the content.
179 */
180 mimeType?: string;
181}
182
183export interface StorageTier {
184 /**
185 * Retrieve data for a key.
186 *
187 * @param key - The key to retrieve
188 * @returns The data as a Uint8Array, or null if not found
189 */
190 get(key: string): Promise<Uint8Array | null>;
191
192 /**
193 * Retrieve data and metadata together in a single operation.
194 *
195 * @param key - The key to retrieve
196 * @returns The data and metadata, or null if not found
197 *
198 * @remarks
199 * This is more efficient than calling get() and getMetadata() separately,
200 * especially for disk and network-based tiers.
201 */
202 getWithMetadata?(key: string): Promise<TierGetResult | null>;
203
204 /**
205 * Retrieve data as a readable stream with metadata.
206 *
207 * @param key - The key to retrieve
208 * @returns A readable stream and metadata, or null if not found
209 *
210 * @remarks
211 * Use this for large files to avoid loading entire content into memory.
212 * The stream must be consumed or destroyed by the caller.
213 */
214 getStream?(key: string): Promise<TierStreamResult | null>;
215
216 /**
217 * Store data from a readable stream.
218 *
219 * @param key - The key to store under
220 * @param stream - Readable stream of data to store
221 * @param metadata - Metadata to store alongside the data
222 *
223 * @remarks
224 * Use this for large files to avoid loading entire content into memory.
225 * The stream will be fully consumed by this operation.
226 */
227 setStream?(
228 key: string,
229 stream: NodeJS.ReadableStream,
230 metadata: StorageMetadata,
231 ): Promise<void>;
232
233 /**
234 * Store data with associated metadata.
235 *
236 * @param key - The key to store under
237 * @param data - The data to store (as Uint8Array)
238 * @param metadata - Metadata to store alongside the data
239 *
240 * @remarks
241 * If the key already exists, it should be overwritten.
242 */
243 set(key: string, data: Uint8Array, metadata: StorageMetadata): Promise<void>;
244
245 /**
246 * Delete data for a key.
247 *
248 * @param key - The key to delete
249 *
250 * @remarks
251 * Should not throw if the key doesn't exist.
252 */
253 delete(key: string): Promise<void>;
254
255 /**
256 * Check if a key exists in this tier.
257 *
258 * @param key - The key to check
259 * @returns true if the key exists, false otherwise
260 */
261 exists(key: string): Promise<boolean>;
262
263 /**
264 * List all keys in this tier, optionally filtered by prefix.
265 *
266 * @param prefix - Optional prefix to filter keys (e.g., 'user:' matches 'user:123', 'user:456')
267 * @returns An async iterator of keys
268 *
269 * @remarks
270 * This should be memory-efficient and stream keys rather than loading all into memory.
271 * Useful for prefix-based invalidation and cache warming.
272 *
273 * @example
274 * ```typescript
275 * for await (const key of tier.listKeys('site:')) {
276 * console.log(key); // 'site:abc', 'site:xyz', etc.
277 * }
278 * ```
279 */
280 listKeys(prefix?: string): AsyncIterableIterator<string>;
281
282 /**
283 * Delete multiple keys in a single operation.
284 *
285 * @param keys - Array of keys to delete
286 *
287 * @remarks
288 * This is more efficient than calling delete() in a loop.
289 * Implementations should batch deletions where possible.
290 */
291 deleteMany(keys: string[]): Promise<void>;
292
293 /**
294 * Retrieve metadata for a key without fetching the data.
295 *
296 * @param key - The key to get metadata for
297 * @returns The metadata, or null if not found
298 *
299 * @remarks
300 * This is useful for checking TTL, access counts, etc. without loading large data.
301 */
302 getMetadata(key: string): Promise<StorageMetadata | null>;
303
304 /**
305 * Update metadata for a key without modifying the data.
306 *
307 * @param key - The key to update metadata for
308 * @param metadata - The new metadata
309 *
310 * @remarks
311 * Useful for updating TTL (via touch()) or access counts.
312 */
313 setMetadata(key: string, metadata: StorageMetadata): Promise<void>;
314
315 /**
316 * Get statistics about this tier.
317 *
318 * @returns Statistics including size, item count, hits, misses, etc.
319 */
320 getStats(): Promise<TierStats>;
321
322 /**
323 * Clear all data from this tier.
324 *
325 * @remarks
326 * Use with caution! This will delete all data in the tier.
327 */
328 clear(): Promise<void>;
329}
330
331/**
332 * Rule for automatic tier placement based on key patterns.
333 *
334 * @remarks
335 * Rules are evaluated in order. First matching rule wins.
336 * Use this to define which keys go to which tiers without
337 * specifying skipTiers on every set() call.
338 *
339 * @example
340 * ```typescript
341 * placementRules: [
342 * { pattern: 'index.html', tiers: ['hot', 'warm', 'cold'] },
343 * { pattern: '*.html', tiers: ['warm', 'cold'] },
344 * { pattern: 'assets/**', tiers: ['warm', 'cold'] },
345 * { pattern: '**', tiers: ['warm', 'cold'] }, // default
346 * ]
347 * ```
348 */
349export interface PlacementRule {
350 /**
351 * Glob pattern to match against keys.
352 *
353 * @remarks
354 * Supports basic globs:
355 * - `*` matches any characters except `/`
356 * - `**` matches any characters including `/`
357 * - Exact matches work too: `index.html`
358 */
359 pattern: string;
360
361 /**
362 * Which tiers to write to for matching keys.
363 *
364 * @remarks
365 * Cold is always included (source of truth).
366 * Use `['hot', 'warm', 'cold']` for critical files.
367 * Use `['warm', 'cold']` for large files.
368 * Use `['cold']` for archival only.
369 */
370 tiers: ('hot' | 'warm' | 'cold')[];
371}
372
373/**
374 * Configuration for the TieredStorage system.
375 *
376 * @typeParam T - The type of data being stored (for serialization)
377 *
378 * @remarks
379 * The tiered storage system uses a cascading containment model:
380 * - Hot tier (optional): Fastest, smallest capacity (memory/Redis)
381 * - Warm tier (optional): Medium speed, medium capacity (disk/database)
382 * - Cold tier (required): Slowest, unlimited capacity (S3/object storage)
383 *
384 * Data flows down on writes (hot → warm → cold) and bubbles up on reads (cold → warm → hot).
385 */
386export interface TieredStorageConfig {
387 /** Storage tier configuration */
388 tiers: {
389 /** Optional hot tier - fastest, smallest capacity (e.g., in-memory, Redis) */
390 hot?: StorageTier;
391
392 /** Optional warm tier - medium speed, medium capacity (e.g., disk, SQLite, Postgres) */
393 warm?: StorageTier;
394
395 /** Required cold tier - slowest, largest capacity (e.g., S3, R2, object storage) */
396 cold: StorageTier;
397 };
398
399 /** Rules for automatic tier placement based on key patterns. First match wins. */
400 placementRules?: PlacementRule[];
401
402 /**
403 * Whether to automatically compress data before storing.
404 *
405 * @defaultValue false
406 *
407 * @remarks
408 * Uses gzip compression. Compression is transparent - data is automatically
409 * decompressed on retrieval. The `compressed` flag in metadata indicates compression state.
410 */
411 compression?: boolean;
412
413 /**
414 * Default TTL (time-to-live) in milliseconds.
415 *
416 * @remarks
417 * Data will expire after this duration. Can be overridden per-key via SetOptions.
418 * If not set, data never expires.
419 */
420 defaultTTL?: number;
421
422 /**
423 * Strategy for promoting data to upper tiers on cache miss.
424 *
425 * @defaultValue 'lazy'
426 *
427 * @remarks
428 * - 'eager': Immediately promote data to all upper tiers on read
429 * - 'lazy': Don't automatically promote; rely on explicit promotion or next write
430 *
431 * Eager promotion increases hot tier hit rate but adds write overhead.
432 * Lazy promotion reduces writes but may serve from lower tiers more often.
433 */
434 promotionStrategy?: 'eager' | 'lazy';
435
436 /**
437 * Custom serialization/deserialization functions.
438 *
439 * @remarks
440 * By default, JSON serialization is used. Provide custom functions for:
441 * - Non-JSON types (e.g., Buffer, custom classes)
442 * - Performance optimization (e.g., msgpack, protobuf)
443 * - Encryption (serialize includes encryption, deserialize includes decryption)
444 */
445 serialization?: {
446 /** Convert data to Uint8Array for storage */
447 serialize: (data: unknown) => Promise<Uint8Array>;
448
449 /** Convert Uint8Array back to original data */
450 deserialize: (data: Uint8Array) => Promise<unknown>;
451 };
452}
453
454/**
455 * Options for setting data in the cache.
456 *
457 * @remarks
458 * These options allow fine-grained control over where and how data is stored.
459 */
460export interface SetOptions {
461 /**
462 * Custom TTL in milliseconds for this specific key.
463 *
464 * @remarks
465 * Overrides the default TTL from TieredStorageConfig.
466 * Data will expire after this duration from the current time.
467 */
468 ttl?: number;
469
470 /**
471 * Custom metadata to attach to this key.
472 *
473 * @remarks
474 * Merged with system-generated metadata (size, checksum, timestamps).
475 * Useful for storing application-specific information like content-type, encoding, etc.
476 */
477 metadata?: Record<string, string>;
478
479 /**
480 * Skip writing to specific tiers.
481 *
482 * @remarks
483 * Useful for controlling which tiers receive data. For example:
484 * - Large files: `skipTiers: ['hot']` to avoid filling memory
485 * - Small critical files: Write to hot only for fastest access
486 *
487 * Note: Cold tier can never be skipped (it's the source of truth).
488 *
489 * @example
490 * ```typescript
491 * // Store large file only in warm and cold (skip memory)
492 * await storage.set('large-video.mp4', videoData, { skipTiers: ['hot'] });
493 *
494 * // Store index.html in all tiers for fast access
495 * await storage.set('index.html', htmlData); // No skipping
496 * ```
497 */
498 skipTiers?: ('hot' | 'warm')[];
499}
500
501/**
502 * Result from retrieving data with metadata.
503 *
504 * @typeParam T - The type of data being retrieved
505 *
506 * @remarks
507 * Includes both the data and information about where it was served from.
508 */
509export interface StorageResult<T> {
510 /** The retrieved data */
511 data: T;
512
513 /** Metadata associated with the data */
514 metadata: StorageMetadata;
515
516 /** Which tier the data was served from */
517 source: 'hot' | 'warm' | 'cold';
518}
519
520/**
521 * Result from setting data in the cache.
522 *
523 * @remarks
524 * Indicates which tiers successfully received the data.
525 */
526export interface SetResult {
527 /** The key that was set */
528 key: string;
529
530 /** Metadata that was stored with the data */
531 metadata: StorageMetadata;
532
533 /** Which tiers received the data */
534 tiersWritten: ('hot' | 'warm' | 'cold')[];
535}
536
537/**
538 * Snapshot of the entire storage state.
539 *
540 * @remarks
541 * Used for export/import, backup, and migration scenarios.
542 * The snapshot includes metadata but not the actual data (data remains in tiers).
543 */
544export interface StorageSnapshot {
545 /** Snapshot format version (for compatibility) */
546 version: number;
547
548 /** When this snapshot was created */
549 exportedAt: Date;
550
551 /** All keys present in cold tier (source of truth) */
552 keys: string[];
553
554 /** Metadata for each key */
555 metadata: Record<string, StorageMetadata>;
556
557 /** Statistics at time of export */
558 stats: AllTierStats;
559}