A community based topic aggregation platform built on atproto
at main 539 lines 14 kB view raw
1package imageproxy 2 3import ( 4 "context" 5 "errors" 6 "io/fs" 7 "log/slog" 8 "os" 9 "path/filepath" 10 "sort" 11 "strings" 12 "time" 13) 14 15var ( 16 // ErrEmptyParameter is returned when a required parameter is empty 17 ErrEmptyParameter = errors.New("required parameter is empty") 18 // ErrInvalidCacheBasePath is returned when the cache base path is empty 19 ErrInvalidCacheBasePath = errors.New("cache base path cannot be empty") 20 // ErrInvalidCacheMaxSize is returned when maxSizeGB is not positive 21 ErrInvalidCacheMaxSize = errors.New("cache max size must be positive") 22) 23 24// Cache defines the interface for image proxy caching 25type Cache interface { 26 // Get retrieves cached image data for the given preset, DID, and CID. 27 // Returns the data, whether it was found, and any error. 28 Get(preset, did, cid string) ([]byte, bool, error) 29 30 // Set stores image data in the cache for the given preset, DID, and CID. 31 Set(preset, did, cid string, data []byte) error 32 33 // Delete removes cached image data for the given preset, DID, and CID. 34 Delete(preset, did, cid string) error 35 36 // Cleanup runs both LRU eviction and TTL cleanup. 37 // Returns the number of entries removed and any error. 38 Cleanup() (int, error) 39} 40 41// DiskCache implements Cache using the filesystem for storage. 42// Cache key format: {basePath}/{preset}/{did_safe}/{cid} 43// where did_safe has colons replaced with underscores for filesystem safety. 44type DiskCache struct { 45 basePath string 46 maxSizeGB int 47 ttlDays int 48} 49 50// NewDiskCache creates a new DiskCache with the specified base path, maximum size, and TTL. 51// Returns an error if basePath is empty or maxSizeGB is not positive. 52// ttlDays of 0 disables TTL-based cleanup (only LRU eviction applies). 53func NewDiskCache(basePath string, maxSizeGB int, ttlDays int) (*DiskCache, error) { 54 if basePath == "" { 55 return nil, ErrInvalidCacheBasePath 56 } 57 if maxSizeGB <= 0 { 58 return nil, ErrInvalidCacheMaxSize 59 } 60 if ttlDays < 0 { 61 return nil, errors.New("ttlDays cannot be negative") 62 } 63 return &DiskCache{ 64 basePath: basePath, 65 maxSizeGB: maxSizeGB, 66 ttlDays: ttlDays, 67 }, nil 68} 69 70// makeDIDSafe converts a DID to a filesystem-safe directory name. 71// It sanitizes the input to prevent path traversal attacks by: 72// - Replacing colons with underscores 73// - Removing path separators (/ and \) 74// - Removing path traversal sequences (..) 75// - Removing null bytes 76func makeDIDSafe(did string) string { 77 // Replace colons with underscores 78 s := strings.ReplaceAll(did, ":", "_") 79 80 // Remove path separators to prevent directory escape 81 s = strings.ReplaceAll(s, "/", "") 82 s = strings.ReplaceAll(s, "\\", "") 83 84 // Remove path traversal sequences 85 s = strings.ReplaceAll(s, "..", "") 86 87 // Remove null bytes (could be used to terminate strings early) 88 s = strings.ReplaceAll(s, "\x00", "") 89 90 return s 91} 92 93// makeCIDSafe sanitizes a CID for use in filesystem paths. 94// It removes characters that could be used for path traversal attacks. 95func makeCIDSafe(cid string) string { 96 // Remove path separators to prevent directory escape 97 s := strings.ReplaceAll(cid, "/", "") 98 s = strings.ReplaceAll(s, "\\", "") 99 100 // Remove path traversal sequences 101 s = strings.ReplaceAll(s, "..", "") 102 103 // Remove null bytes 104 s = strings.ReplaceAll(s, "\x00", "") 105 106 return s 107} 108 109// makePresetSafe sanitizes a preset name for use in filesystem paths. 110func makePresetSafe(preset string) string { 111 // Remove path separators 112 s := strings.ReplaceAll(preset, "/", "") 113 s = strings.ReplaceAll(s, "\\", "") 114 115 // Remove path traversal sequences 116 s = strings.ReplaceAll(s, "..", "") 117 118 // Remove null bytes 119 s = strings.ReplaceAll(s, "\x00", "") 120 121 return s 122} 123 124// cachePath constructs the full filesystem path for a cached item. 125// All components are sanitized to prevent path traversal attacks. 126func (c *DiskCache) cachePath(preset, did, cid string) string { 127 presetSafe := makePresetSafe(preset) 128 didSafe := makeDIDSafe(did) 129 cidSafe := makeCIDSafe(cid) 130 return filepath.Join(c.basePath, presetSafe, didSafe, cidSafe) 131} 132 133// validateParams checks that all required parameters are non-empty. 134func validateParams(preset, did, cid string) error { 135 if preset == "" || did == "" || cid == "" { 136 return ErrEmptyParameter 137 } 138 return nil 139} 140 141// Get retrieves cached image data for the given preset, DID, and CID. 142// Returns the data, whether it was found, and any error. 143// If the item is not in cache, returns (nil, false, nil). 144// Updates the file's modification time on access for LRU tracking. 145func (c *DiskCache) Get(preset, did, cid string) ([]byte, bool, error) { 146 if err := validateParams(preset, did, cid); err != nil { 147 return nil, false, err 148 } 149 150 path := c.cachePath(preset, did, cid) 151 152 data, err := os.ReadFile(path) 153 if err != nil { 154 if os.IsNotExist(err) { 155 return nil, false, nil 156 } 157 return nil, false, err 158 } 159 160 // Update mtime for LRU tracking 161 // Log errors as warnings since failed mtime updates degrade LRU accuracy 162 now := time.Now() 163 if chtimesErr := os.Chtimes(path, now, now); chtimesErr != nil { 164 slog.Warn("[IMAGE-PROXY] failed to update mtime for LRU tracking", 165 "path", path, 166 "error", chtimesErr, 167 ) 168 } 169 170 return data, true, nil 171} 172 173// Set stores image data in the cache for the given preset, DID, and CID. 174// Creates necessary directories if they don't exist. 175func (c *DiskCache) Set(preset, did, cid string, data []byte) error { 176 if err := validateParams(preset, did, cid); err != nil { 177 return err 178 } 179 180 path := c.cachePath(preset, did, cid) 181 dir := filepath.Dir(path) 182 183 // Create directory structure if it doesn't exist 184 if err := os.MkdirAll(dir, 0755); err != nil { 185 return err 186 } 187 188 // Write the file atomically by writing to a temp file first 189 // then renaming (to avoid partial writes on crash) 190 tmpPath := path + ".tmp" 191 if err := os.WriteFile(tmpPath, data, 0644); err != nil { 192 return err 193 } 194 195 return os.Rename(tmpPath, path) 196} 197 198// Delete removes cached image data for the given preset, DID, and CID. 199// Returns nil if the item doesn't exist (idempotent delete). 200func (c *DiskCache) Delete(preset, did, cid string) error { 201 if err := validateParams(preset, did, cid); err != nil { 202 return err 203 } 204 205 path := c.cachePath(preset, did, cid) 206 207 err := os.Remove(path) 208 if err != nil && !os.IsNotExist(err) { 209 return err 210 } 211 212 return nil 213} 214 215// cacheEntry represents a cached file with its metadata. 216type cacheEntry struct { 217 path string 218 size int64 219 modTime time.Time 220} 221 222// scanCache walks the cache directory and returns all cache entries. 223func (c *DiskCache) scanCache() ([]cacheEntry, int64, error) { 224 var entries []cacheEntry 225 var totalSize int64 226 227 err := filepath.WalkDir(c.basePath, func(path string, d fs.DirEntry, err error) error { 228 if err != nil { 229 return err 230 } 231 if d.IsDir() { 232 return nil 233 } 234 235 info, err := d.Info() 236 if err != nil { 237 slog.Warn("[IMAGE-PROXY] failed to stat file during cache scan, cache size may be inaccurate", 238 "path", path, 239 "error", err, 240 ) 241 return nil // Skip files we can't stat 242 } 243 244 entries = append(entries, cacheEntry{ 245 path: path, 246 size: info.Size(), 247 modTime: info.ModTime(), 248 }) 249 totalSize += info.Size() 250 251 return nil 252 }) 253 254 if err != nil && !os.IsNotExist(err) { 255 return nil, 0, err 256 } 257 258 return entries, totalSize, nil 259} 260 261// GetCacheSize returns the current cache size in bytes. 262func (c *DiskCache) GetCacheSize() (int64, error) { 263 _, totalSize, err := c.scanCache() 264 return totalSize, err 265} 266 267// EvictLRU removes the least recently used entries until the cache is under the size limit. 268// Returns the number of entries removed. 269func (c *DiskCache) EvictLRU() (int, error) { 270 entries, totalSize, err := c.scanCache() 271 if err != nil { 272 return 0, err 273 } 274 275 maxSizeBytes := int64(c.maxSizeGB) * 1024 * 1024 * 1024 276 if totalSize <= maxSizeBytes { 277 return 0, nil // Under limit, nothing to do 278 } 279 280 // Sort by modification time (oldest first for LRU) 281 sort.Slice(entries, func(i, j int) bool { 282 return entries[i].modTime.Before(entries[j].modTime) 283 }) 284 285 removed := 0 286 for _, entry := range entries { 287 if totalSize <= maxSizeBytes { 288 break 289 } 290 291 if err := os.Remove(entry.path); err != nil { 292 if !os.IsNotExist(err) { 293 slog.Warn("[IMAGE-PROXY] failed to remove cache entry during LRU eviction", 294 "path", entry.path, 295 "error", err, 296 ) 297 } 298 continue 299 } 300 301 totalSize -= entry.size 302 removed++ 303 304 slog.Debug("[IMAGE-PROXY] evicted cache entry (LRU)", 305 "path", entry.path, 306 "size_bytes", entry.size, 307 ) 308 } 309 310 if removed > 0 { 311 slog.Info("[IMAGE-PROXY] LRU eviction completed", 312 "entries_removed", removed, 313 "new_size_bytes", totalSize, 314 "max_size_bytes", maxSizeBytes, 315 ) 316 } 317 318 return removed, nil 319} 320 321// CleanExpired removes cache entries older than the configured TTL. 322// Returns the number of entries removed. 323// If TTL is 0 (disabled), returns 0 without scanning. 324func (c *DiskCache) CleanExpired() (int, error) { 325 if c.ttlDays <= 0 { 326 return 0, nil // TTL disabled 327 } 328 329 entries, _, err := c.scanCache() 330 if err != nil { 331 return 0, err 332 } 333 334 cutoff := time.Now().AddDate(0, 0, -c.ttlDays) 335 removed := 0 336 337 for _, entry := range entries { 338 if entry.modTime.After(cutoff) { 339 continue // Not expired 340 } 341 342 if err := os.Remove(entry.path); err != nil { 343 if !os.IsNotExist(err) { 344 slog.Warn("[IMAGE-PROXY] failed to remove expired cache entry", 345 "path", entry.path, 346 "mod_time", entry.modTime, 347 "error", err, 348 ) 349 } 350 continue 351 } 352 353 removed++ 354 355 slog.Debug("[IMAGE-PROXY] removed expired cache entry", 356 "path", entry.path, 357 "mod_time", entry.modTime, 358 "ttl_days", c.ttlDays, 359 ) 360 } 361 362 if removed > 0 { 363 slog.Info("[IMAGE-PROXY] TTL cleanup completed", 364 "entries_removed", removed, 365 "ttl_days", c.ttlDays, 366 ) 367 } 368 369 return removed, nil 370} 371 372// Cleanup runs both TTL cleanup and LRU eviction. 373// TTL cleanup runs first (removes definitely stale entries), 374// then LRU eviction runs if still over size limit. 375// Returns the total number of entries removed. 376func (c *DiskCache) Cleanup() (int, error) { 377 totalRemoved := 0 378 379 // First, remove expired entries (definitely stale) 380 ttlRemoved, err := c.CleanExpired() 381 if err != nil { 382 return 0, err 383 } 384 totalRemoved += ttlRemoved 385 386 // Then, run LRU eviction if still over limit 387 lruRemoved, err := c.EvictLRU() 388 if err != nil { 389 return totalRemoved, err 390 } 391 totalRemoved += lruRemoved 392 393 return totalRemoved, nil 394} 395 396// cleanEmptyDirs removes empty directories under the cache base path. 397// This is useful after eviction/cleanup to remove orphaned preset/DID directories. 398func (c *DiskCache) cleanEmptyDirs() error { 399 // Walk in reverse depth order to clean leaf directories first 400 var dirs []string 401 402 var walkErrors []error 403 err := filepath.WalkDir(c.basePath, func(path string, d fs.DirEntry, err error) error { 404 if err != nil { 405 // Log WalkDir errors but continue scanning to clean as much as possible 406 slog.Warn("[IMAGE-PROXY] error during empty dir cleanup scan", 407 "path", path, 408 "error", err, 409 ) 410 walkErrors = append(walkErrors, err) 411 return nil // Continue scanning despite errors 412 } 413 if d.IsDir() && path != c.basePath { 414 dirs = append(dirs, path) 415 } 416 return nil 417 }) 418 419 if err != nil { 420 return err 421 } 422 423 if len(walkErrors) > 0 { 424 slog.Warn("[IMAGE-PROXY] encountered errors during empty dir cleanup scan", 425 "error_count", len(walkErrors), 426 ) 427 } 428 429 // Sort by length descending (deepest paths first) 430 sort.Slice(dirs, func(i, j int) bool { 431 return len(dirs[i]) > len(dirs[j]) 432 }) 433 434 var removeErrors int 435 for _, dir := range dirs { 436 entries, err := os.ReadDir(dir) 437 if err != nil { 438 slog.Warn("[IMAGE-PROXY] failed to read directory during cleanup", 439 "path", dir, 440 "error", err, 441 ) 442 continue 443 } 444 if len(entries) == 0 { 445 if removeErr := os.Remove(dir); removeErr != nil { 446 slog.Warn("[IMAGE-PROXY] failed to remove empty directory", 447 "path", dir, 448 "error", removeErr, 449 ) 450 removeErrors++ 451 } 452 } 453 } 454 455 if removeErrors > 0 { 456 slog.Warn("[IMAGE-PROXY] some empty directories could not be removed", 457 "failed_count", removeErrors, 458 ) 459 } 460 461 return nil 462} 463 464// StartCleanupJob starts a background goroutine that periodically runs cache cleanup. 465// Returns a cancel function that should be called during graceful shutdown. 466// If interval is 0 or negative, no cleanup job is started and the cancel function is a no-op. 467func (c *DiskCache) StartCleanupJob(interval time.Duration) context.CancelFunc { 468 if interval <= 0 { 469 slog.Info("[IMAGE-PROXY] cache cleanup job disabled (interval=0)") 470 return func() {} // No-op cancel 471 } 472 473 ctx, cancel := context.WithCancel(context.Background()) 474 475 go func() { 476 defer func() { 477 if r := recover(); r != nil { 478 slog.Error("[IMAGE-PROXY] CRITICAL: cache cleanup job panicked", 479 "panic", r, 480 ) 481 } 482 }() 483 484 ticker := time.NewTicker(interval) 485 defer ticker.Stop() 486 487 slog.Info("[IMAGE-PROXY] cache cleanup job started", 488 "interval", interval, 489 "ttl_days", c.ttlDays, 490 "max_size_gb", c.maxSizeGB, 491 ) 492 493 cycleCount := 0 494 for { 495 select { 496 case <-ctx.Done(): 497 slog.Info("[IMAGE-PROXY] cache cleanup job stopped") 498 return 499 case <-ticker.C: 500 cycleCount++ 501 502 removed, err := c.Cleanup() 503 if err != nil { 504 slog.Error("[IMAGE-PROXY] cache cleanup error", 505 "error", err, 506 "cycle", cycleCount, 507 ) 508 continue 509 } 510 511 // Also clean up empty directories after removing files 512 if removed > 0 { 513 if err := c.cleanEmptyDirs(); err != nil { 514 slog.Warn("[IMAGE-PROXY] failed to clean empty directories", 515 "error", err, 516 ) 517 } 518 } 519 520 // Log activity or heartbeat every 6 cycles (6 hours if interval is 1h) 521 if removed > 0 { 522 slog.Info("[IMAGE-PROXY] cache cleanup completed", 523 "entries_removed", removed, 524 "cycle", cycleCount, 525 ) 526 } else if cycleCount%6 == 0 { 527 // Get cache size for heartbeat log 528 size, _ := c.GetCacheSize() 529 slog.Debug("[IMAGE-PROXY] cache cleanup heartbeat", 530 "cycle", cycleCount, 531 "cache_size_bytes", size, 532 ) 533 } 534 } 535 } 536 }() 537 538 return cancel 539}