A community based topic aggregation platform built on atproto
1package imageproxy
2
3import (
4 "context"
5 "errors"
6 "io/fs"
7 "log/slog"
8 "os"
9 "path/filepath"
10 "sort"
11 "strings"
12 "time"
13)
14
15var (
16 // ErrEmptyParameter is returned when a required parameter is empty
17 ErrEmptyParameter = errors.New("required parameter is empty")
18 // ErrInvalidCacheBasePath is returned when the cache base path is empty
19 ErrInvalidCacheBasePath = errors.New("cache base path cannot be empty")
20 // ErrInvalidCacheMaxSize is returned when maxSizeGB is not positive
21 ErrInvalidCacheMaxSize = errors.New("cache max size must be positive")
22)
23
24// Cache defines the interface for image proxy caching
25type Cache interface {
26 // Get retrieves cached image data for the given preset, DID, and CID.
27 // Returns the data, whether it was found, and any error.
28 Get(preset, did, cid string) ([]byte, bool, error)
29
30 // Set stores image data in the cache for the given preset, DID, and CID.
31 Set(preset, did, cid string, data []byte) error
32
33 // Delete removes cached image data for the given preset, DID, and CID.
34 Delete(preset, did, cid string) error
35
36 // Cleanup runs both LRU eviction and TTL cleanup.
37 // Returns the number of entries removed and any error.
38 Cleanup() (int, error)
39}
40
41// DiskCache implements Cache using the filesystem for storage.
42// Cache key format: {basePath}/{preset}/{did_safe}/{cid}
43// where did_safe has colons replaced with underscores for filesystem safety.
44type DiskCache struct {
45 basePath string
46 maxSizeGB int
47 ttlDays int
48}
49
50// NewDiskCache creates a new DiskCache with the specified base path, maximum size, and TTL.
51// Returns an error if basePath is empty or maxSizeGB is not positive.
52// ttlDays of 0 disables TTL-based cleanup (only LRU eviction applies).
53func NewDiskCache(basePath string, maxSizeGB int, ttlDays int) (*DiskCache, error) {
54 if basePath == "" {
55 return nil, ErrInvalidCacheBasePath
56 }
57 if maxSizeGB <= 0 {
58 return nil, ErrInvalidCacheMaxSize
59 }
60 if ttlDays < 0 {
61 return nil, errors.New("ttlDays cannot be negative")
62 }
63 return &DiskCache{
64 basePath: basePath,
65 maxSizeGB: maxSizeGB,
66 ttlDays: ttlDays,
67 }, nil
68}
69
70// makeDIDSafe converts a DID to a filesystem-safe directory name.
71// It sanitizes the input to prevent path traversal attacks by:
72// - Replacing colons with underscores
73// - Removing path separators (/ and \)
74// - Removing path traversal sequences (..)
75// - Removing null bytes
76func makeDIDSafe(did string) string {
77 // Replace colons with underscores
78 s := strings.ReplaceAll(did, ":", "_")
79
80 // Remove path separators to prevent directory escape
81 s = strings.ReplaceAll(s, "/", "")
82 s = strings.ReplaceAll(s, "\\", "")
83
84 // Remove path traversal sequences
85 s = strings.ReplaceAll(s, "..", "")
86
87 // Remove null bytes (could be used to terminate strings early)
88 s = strings.ReplaceAll(s, "\x00", "")
89
90 return s
91}
92
93// makeCIDSafe sanitizes a CID for use in filesystem paths.
94// It removes characters that could be used for path traversal attacks.
95func makeCIDSafe(cid string) string {
96 // Remove path separators to prevent directory escape
97 s := strings.ReplaceAll(cid, "/", "")
98 s = strings.ReplaceAll(s, "\\", "")
99
100 // Remove path traversal sequences
101 s = strings.ReplaceAll(s, "..", "")
102
103 // Remove null bytes
104 s = strings.ReplaceAll(s, "\x00", "")
105
106 return s
107}
108
109// makePresetSafe sanitizes a preset name for use in filesystem paths.
110func makePresetSafe(preset string) string {
111 // Remove path separators
112 s := strings.ReplaceAll(preset, "/", "")
113 s = strings.ReplaceAll(s, "\\", "")
114
115 // Remove path traversal sequences
116 s = strings.ReplaceAll(s, "..", "")
117
118 // Remove null bytes
119 s = strings.ReplaceAll(s, "\x00", "")
120
121 return s
122}
123
124// cachePath constructs the full filesystem path for a cached item.
125// All components are sanitized to prevent path traversal attacks.
126func (c *DiskCache) cachePath(preset, did, cid string) string {
127 presetSafe := makePresetSafe(preset)
128 didSafe := makeDIDSafe(did)
129 cidSafe := makeCIDSafe(cid)
130 return filepath.Join(c.basePath, presetSafe, didSafe, cidSafe)
131}
132
133// validateParams checks that all required parameters are non-empty.
134func validateParams(preset, did, cid string) error {
135 if preset == "" || did == "" || cid == "" {
136 return ErrEmptyParameter
137 }
138 return nil
139}
140
141// Get retrieves cached image data for the given preset, DID, and CID.
142// Returns the data, whether it was found, and any error.
143// If the item is not in cache, returns (nil, false, nil).
144// Updates the file's modification time on access for LRU tracking.
145func (c *DiskCache) Get(preset, did, cid string) ([]byte, bool, error) {
146 if err := validateParams(preset, did, cid); err != nil {
147 return nil, false, err
148 }
149
150 path := c.cachePath(preset, did, cid)
151
152 data, err := os.ReadFile(path)
153 if err != nil {
154 if os.IsNotExist(err) {
155 return nil, false, nil
156 }
157 return nil, false, err
158 }
159
160 // Update mtime for LRU tracking
161 // Log errors as warnings since failed mtime updates degrade LRU accuracy
162 now := time.Now()
163 if chtimesErr := os.Chtimes(path, now, now); chtimesErr != nil {
164 slog.Warn("[IMAGE-PROXY] failed to update mtime for LRU tracking",
165 "path", path,
166 "error", chtimesErr,
167 )
168 }
169
170 return data, true, nil
171}
172
173// Set stores image data in the cache for the given preset, DID, and CID.
174// Creates necessary directories if they don't exist.
175func (c *DiskCache) Set(preset, did, cid string, data []byte) error {
176 if err := validateParams(preset, did, cid); err != nil {
177 return err
178 }
179
180 path := c.cachePath(preset, did, cid)
181 dir := filepath.Dir(path)
182
183 // Create directory structure if it doesn't exist
184 if err := os.MkdirAll(dir, 0755); err != nil {
185 return err
186 }
187
188 // Write the file atomically by writing to a temp file first
189 // then renaming (to avoid partial writes on crash)
190 tmpPath := path + ".tmp"
191 if err := os.WriteFile(tmpPath, data, 0644); err != nil {
192 return err
193 }
194
195 return os.Rename(tmpPath, path)
196}
197
198// Delete removes cached image data for the given preset, DID, and CID.
199// Returns nil if the item doesn't exist (idempotent delete).
200func (c *DiskCache) Delete(preset, did, cid string) error {
201 if err := validateParams(preset, did, cid); err != nil {
202 return err
203 }
204
205 path := c.cachePath(preset, did, cid)
206
207 err := os.Remove(path)
208 if err != nil && !os.IsNotExist(err) {
209 return err
210 }
211
212 return nil
213}
214
215// cacheEntry represents a cached file with its metadata.
216type cacheEntry struct {
217 path string
218 size int64
219 modTime time.Time
220}
221
222// scanCache walks the cache directory and returns all cache entries.
223func (c *DiskCache) scanCache() ([]cacheEntry, int64, error) {
224 var entries []cacheEntry
225 var totalSize int64
226
227 err := filepath.WalkDir(c.basePath, func(path string, d fs.DirEntry, err error) error {
228 if err != nil {
229 return err
230 }
231 if d.IsDir() {
232 return nil
233 }
234
235 info, err := d.Info()
236 if err != nil {
237 slog.Warn("[IMAGE-PROXY] failed to stat file during cache scan, cache size may be inaccurate",
238 "path", path,
239 "error", err,
240 )
241 return nil // Skip files we can't stat
242 }
243
244 entries = append(entries, cacheEntry{
245 path: path,
246 size: info.Size(),
247 modTime: info.ModTime(),
248 })
249 totalSize += info.Size()
250
251 return nil
252 })
253
254 if err != nil && !os.IsNotExist(err) {
255 return nil, 0, err
256 }
257
258 return entries, totalSize, nil
259}
260
261// GetCacheSize returns the current cache size in bytes.
262func (c *DiskCache) GetCacheSize() (int64, error) {
263 _, totalSize, err := c.scanCache()
264 return totalSize, err
265}
266
267// EvictLRU removes the least recently used entries until the cache is under the size limit.
268// Returns the number of entries removed.
269func (c *DiskCache) EvictLRU() (int, error) {
270 entries, totalSize, err := c.scanCache()
271 if err != nil {
272 return 0, err
273 }
274
275 maxSizeBytes := int64(c.maxSizeGB) * 1024 * 1024 * 1024
276 if totalSize <= maxSizeBytes {
277 return 0, nil // Under limit, nothing to do
278 }
279
280 // Sort by modification time (oldest first for LRU)
281 sort.Slice(entries, func(i, j int) bool {
282 return entries[i].modTime.Before(entries[j].modTime)
283 })
284
285 removed := 0
286 for _, entry := range entries {
287 if totalSize <= maxSizeBytes {
288 break
289 }
290
291 if err := os.Remove(entry.path); err != nil {
292 if !os.IsNotExist(err) {
293 slog.Warn("[IMAGE-PROXY] failed to remove cache entry during LRU eviction",
294 "path", entry.path,
295 "error", err,
296 )
297 }
298 continue
299 }
300
301 totalSize -= entry.size
302 removed++
303
304 slog.Debug("[IMAGE-PROXY] evicted cache entry (LRU)",
305 "path", entry.path,
306 "size_bytes", entry.size,
307 )
308 }
309
310 if removed > 0 {
311 slog.Info("[IMAGE-PROXY] LRU eviction completed",
312 "entries_removed", removed,
313 "new_size_bytes", totalSize,
314 "max_size_bytes", maxSizeBytes,
315 )
316 }
317
318 return removed, nil
319}
320
321// CleanExpired removes cache entries older than the configured TTL.
322// Returns the number of entries removed.
323// If TTL is 0 (disabled), returns 0 without scanning.
324func (c *DiskCache) CleanExpired() (int, error) {
325 if c.ttlDays <= 0 {
326 return 0, nil // TTL disabled
327 }
328
329 entries, _, err := c.scanCache()
330 if err != nil {
331 return 0, err
332 }
333
334 cutoff := time.Now().AddDate(0, 0, -c.ttlDays)
335 removed := 0
336
337 for _, entry := range entries {
338 if entry.modTime.After(cutoff) {
339 continue // Not expired
340 }
341
342 if err := os.Remove(entry.path); err != nil {
343 if !os.IsNotExist(err) {
344 slog.Warn("[IMAGE-PROXY] failed to remove expired cache entry",
345 "path", entry.path,
346 "mod_time", entry.modTime,
347 "error", err,
348 )
349 }
350 continue
351 }
352
353 removed++
354
355 slog.Debug("[IMAGE-PROXY] removed expired cache entry",
356 "path", entry.path,
357 "mod_time", entry.modTime,
358 "ttl_days", c.ttlDays,
359 )
360 }
361
362 if removed > 0 {
363 slog.Info("[IMAGE-PROXY] TTL cleanup completed",
364 "entries_removed", removed,
365 "ttl_days", c.ttlDays,
366 )
367 }
368
369 return removed, nil
370}
371
372// Cleanup runs both TTL cleanup and LRU eviction.
373// TTL cleanup runs first (removes definitely stale entries),
374// then LRU eviction runs if still over size limit.
375// Returns the total number of entries removed.
376func (c *DiskCache) Cleanup() (int, error) {
377 totalRemoved := 0
378
379 // First, remove expired entries (definitely stale)
380 ttlRemoved, err := c.CleanExpired()
381 if err != nil {
382 return 0, err
383 }
384 totalRemoved += ttlRemoved
385
386 // Then, run LRU eviction if still over limit
387 lruRemoved, err := c.EvictLRU()
388 if err != nil {
389 return totalRemoved, err
390 }
391 totalRemoved += lruRemoved
392
393 return totalRemoved, nil
394}
395
396// cleanEmptyDirs removes empty directories under the cache base path.
397// This is useful after eviction/cleanup to remove orphaned preset/DID directories.
398func (c *DiskCache) cleanEmptyDirs() error {
399 // Walk in reverse depth order to clean leaf directories first
400 var dirs []string
401
402 var walkErrors []error
403 err := filepath.WalkDir(c.basePath, func(path string, d fs.DirEntry, err error) error {
404 if err != nil {
405 // Log WalkDir errors but continue scanning to clean as much as possible
406 slog.Warn("[IMAGE-PROXY] error during empty dir cleanup scan",
407 "path", path,
408 "error", err,
409 )
410 walkErrors = append(walkErrors, err)
411 return nil // Continue scanning despite errors
412 }
413 if d.IsDir() && path != c.basePath {
414 dirs = append(dirs, path)
415 }
416 return nil
417 })
418
419 if err != nil {
420 return err
421 }
422
423 if len(walkErrors) > 0 {
424 slog.Warn("[IMAGE-PROXY] encountered errors during empty dir cleanup scan",
425 "error_count", len(walkErrors),
426 )
427 }
428
429 // Sort by length descending (deepest paths first)
430 sort.Slice(dirs, func(i, j int) bool {
431 return len(dirs[i]) > len(dirs[j])
432 })
433
434 var removeErrors int
435 for _, dir := range dirs {
436 entries, err := os.ReadDir(dir)
437 if err != nil {
438 slog.Warn("[IMAGE-PROXY] failed to read directory during cleanup",
439 "path", dir,
440 "error", err,
441 )
442 continue
443 }
444 if len(entries) == 0 {
445 if removeErr := os.Remove(dir); removeErr != nil {
446 slog.Warn("[IMAGE-PROXY] failed to remove empty directory",
447 "path", dir,
448 "error", removeErr,
449 )
450 removeErrors++
451 }
452 }
453 }
454
455 if removeErrors > 0 {
456 slog.Warn("[IMAGE-PROXY] some empty directories could not be removed",
457 "failed_count", removeErrors,
458 )
459 }
460
461 return nil
462}
463
464// StartCleanupJob starts a background goroutine that periodically runs cache cleanup.
465// Returns a cancel function that should be called during graceful shutdown.
466// If interval is 0 or negative, no cleanup job is started and the cancel function is a no-op.
467func (c *DiskCache) StartCleanupJob(interval time.Duration) context.CancelFunc {
468 if interval <= 0 {
469 slog.Info("[IMAGE-PROXY] cache cleanup job disabled (interval=0)")
470 return func() {} // No-op cancel
471 }
472
473 ctx, cancel := context.WithCancel(context.Background())
474
475 go func() {
476 defer func() {
477 if r := recover(); r != nil {
478 slog.Error("[IMAGE-PROXY] CRITICAL: cache cleanup job panicked",
479 "panic", r,
480 )
481 }
482 }()
483
484 ticker := time.NewTicker(interval)
485 defer ticker.Stop()
486
487 slog.Info("[IMAGE-PROXY] cache cleanup job started",
488 "interval", interval,
489 "ttl_days", c.ttlDays,
490 "max_size_gb", c.maxSizeGB,
491 )
492
493 cycleCount := 0
494 for {
495 select {
496 case <-ctx.Done():
497 slog.Info("[IMAGE-PROXY] cache cleanup job stopped")
498 return
499 case <-ticker.C:
500 cycleCount++
501
502 removed, err := c.Cleanup()
503 if err != nil {
504 slog.Error("[IMAGE-PROXY] cache cleanup error",
505 "error", err,
506 "cycle", cycleCount,
507 )
508 continue
509 }
510
511 // Also clean up empty directories after removing files
512 if removed > 0 {
513 if err := c.cleanEmptyDirs(); err != nil {
514 slog.Warn("[IMAGE-PROXY] failed to clean empty directories",
515 "error", err,
516 )
517 }
518 }
519
520 // Log activity or heartbeat every 6 cycles (6 hours if interval is 1h)
521 if removed > 0 {
522 slog.Info("[IMAGE-PROXY] cache cleanup completed",
523 "entries_removed", removed,
524 "cycle", cycleCount,
525 )
526 } else if cycleCount%6 == 0 {
527 // Get cache size for heartbeat log
528 size, _ := c.GetCacheSize()
529 slog.Debug("[IMAGE-PROXY] cache cleanup heartbeat",
530 "cycle", cycleCount,
531 "cache_size_bytes", size,
532 )
533 }
534 }
535 }
536 }()
537
538 return cancel
539}