package cache import ( "container/list" "crypto/sha256" "encoding/hex" "fmt" "log/slog" "os" "path/filepath" "sync" "git-summarizer/pkg/git" "github.com/go-git/go-git/v5/plumbing/transport" ) // RepoCache manages cached git repositories with LRU eviction type RepoCache struct { baseDir string maxRepos int mu sync.Mutex repoLocks map[string]*sync.Mutex lru *list.List repos map[string]*cacheEntry } type cacheEntry struct { url string path string element *list.Element } // New creates a new RepoCache func New(baseDir string, maxRepos int) *RepoCache { return &RepoCache{ baseDir: baseDir, maxRepos: maxRepos, repoLocks: make(map[string]*sync.Mutex), lru: list.New(), repos: make(map[string]*cacheEntry), } } // GetOrClone returns a cached repo or clones it if not present // If the repo is cached, it fetches updates before returning func (c *RepoCache) GetOrClone(url string, auth transport.AuthMethod) (*git.Repo, error) { // Get or create per-repo lock repoLock := c.getRepoLock(url) repoLock.Lock() defer repoLock.Unlock() // Check if repo exists in cache c.mu.Lock() entry, exists := c.repos[url] if exists { // Move to front of LRU c.lru.MoveToFront(entry.element) c.mu.Unlock() // Fetch updates slog.Info("fetching cached repo", "url", url) repo, err := git.Open(entry.path) if err != nil { // Cache entry invalid, remove and re-clone slog.Warn("cached repo invalid, re-cloning", "url", url, "error", err) c.mu.Lock() c.removeEntry(url) c.mu.Unlock() return c.cloneNew(url, auth) } if err := repo.Fetch(auth); err != nil { slog.Warn("fetch failed", "url", url, "error", err) // Continue with potentially stale data rather than failing } return repo, nil } c.mu.Unlock() // Clone new repo return c.cloneNew(url, auth) } // cloneNew clones a repo and adds it to the cache func (c *RepoCache) cloneNew(url string, auth transport.AuthMethod) (*git.Repo, error) { c.mu.Lock() // Evict if at capacity for c.lru.Len() >= c.maxRepos { c.evictLRU() } // Prepare cache path path := c.urlToPath(url) c.mu.Unlock() // Ensure cache directory exists if err := os.MkdirAll(c.baseDir, 0755); err != nil { return nil, fmt.Errorf("failed to create cache dir: %w", err) } // Clone slog.Info("cloning repo to cache", "url", url, "path", path) repo, err := git.Clone(url, path, auth) if err != nil { return nil, err } // Add to cache c.mu.Lock() entry := &cacheEntry{ url: url, path: path, } entry.element = c.lru.PushFront(url) c.repos[url] = entry c.mu.Unlock() slog.Info("repo cached", "url", url, "cache_size", c.lru.Len()) return repo, nil } // getRepoLock returns the lock for a specific repo URL func (c *RepoCache) getRepoLock(url string) *sync.Mutex { c.mu.Lock() defer c.mu.Unlock() lock, exists := c.repoLocks[url] if !exists { lock = &sync.Mutex{} c.repoLocks[url] = lock } return lock } // evictLRU removes the least recently used repo from the cache // Must be called with c.mu held func (c *RepoCache) evictLRU() { elem := c.lru.Back() if elem == nil { return } url := elem.Value.(string) c.removeEntry(url) slog.Info("evicted repo from cache", "url", url) } // removeEntry removes a repo from the cache // Must be called with c.mu held func (c *RepoCache) removeEntry(url string) { entry, exists := c.repos[url] if !exists { return } // Remove from LRU list c.lru.Remove(entry.element) // Remove from map delete(c.repos, url) // Remove from disk if err := os.RemoveAll(entry.path); err != nil { slog.Warn("failed to remove cached repo", "path", entry.path, "error", err) } } // urlToPath converts a repo URL to a filesystem-safe path func (c *RepoCache) urlToPath(url string) string { hash := sha256.Sum256([]byte(url)) hashStr := hex.EncodeToString(hash[:8]) // Use first 8 bytes (16 hex chars) return filepath.Join(c.baseDir, hashStr) } // Size returns the number of cached repos func (c *RepoCache) Size() int { c.mu.Lock() defer c.mu.Unlock() return c.lru.Len() }