package connectors import ( "context" "encoding/base64" "fmt" "os" "path/filepath" "sort" "strconv" "strings" "sync" "time" "github.com/google/go-github/v58/github" "github.com/yourusername/markedit/internal/database" "github.com/yourusername/markedit/internal/markdown" "golang.org/x/oauth2" "golang.org/x/sync/singleflight" ) // fileTreeCache is an in-memory LRU cache for file tree data type fileTreeCache struct { mu sync.RWMutex entries map[string]*cacheEntry maxSize int ttl time.Duration sfGroup singleflight.Group // Request deduplication } type cacheEntry struct { data *FileNode expiresAt time.Time etag string // For future ETag support fetchedAt time.Time } var globalCache = &fileTreeCache{ entries: make(map[string]*cacheEntry), maxSize: 100, // Configurable via env ttl: 5 * time.Minute, } func init() { // Configure cache from environment if ttlStr := os.Getenv("FILE_TREE_CACHE_TTL"); ttlStr != "" { if ttl, err := time.ParseDuration(ttlStr); err == nil { globalCache.ttl = ttl } } if sizeStr := os.Getenv("FILE_TREE_CACHE_SIZE"); sizeStr != "" { if size, err := strconv.Atoi(sizeStr); err == nil { globalCache.maxSize = size } } } // Get retrieves from cache if not expired func (c *fileTreeCache) Get(key string) (*FileNode, bool) { c.mu.RLock() defer c.mu.RUnlock() entry, exists := c.entries[key] if !exists || time.Now().After(entry.expiresAt) { return nil, false } return entry.data, true } // Set stores in cache with TTL func (c *fileTreeCache) Set(key string, data *FileNode) { c.mu.Lock() defer c.mu.Unlock() // LRU eviction if at capacity if len(c.entries) >= c.maxSize { c.evictOldest() } c.entries[key] = &cacheEntry{ data: data, expiresAt: time.Now().Add(c.ttl), fetchedAt: time.Now(), } } // Invalidate removes specific cache entry func (c *fileTreeCache) Invalidate(key string) { c.mu.Lock() defer c.mu.Unlock() delete(c.entries, key) } // InvalidatePattern removes all matching keys (e.g., "owner/repo/*") func (c *fileTreeCache) InvalidatePattern(pattern string) int { c.mu.Lock() defer c.mu.Unlock() count := 0 for key := range c.entries { if matchesPattern(key, pattern) { delete(c.entries, key) count++ } } return count } // evictOldest removes the oldest entry (LRU) func (c *fileTreeCache) evictOldest() { var oldestKey string var oldestTime time.Time for key, entry := range c.entries { if oldestKey == "" || entry.fetchedAt.Before(oldestTime) { oldestKey = key oldestTime = entry.fetchedAt } } if oldestKey != "" { delete(c.entries, oldestKey) } } // generateCacheKey creates consistent cache key func generateCacheKey(owner, repo, branch, path string, extensions []string) string { extStr := strings.Join(extensions, ",") return fmt.Sprintf("%s/%s/%s/%s/%s", owner, repo, branch, path, extStr) } // matchesPattern checks if key matches pattern (simple glob) func matchesPattern(key, pattern string) bool { // Simple implementation: pattern can end with /* for prefix match if strings.HasSuffix(pattern, "/*") { prefix := strings.TrimSuffix(pattern, "/*") return strings.HasPrefix(key, prefix) } return key == pattern } // GitHubConnector implements the Connector interface for GitHub type GitHubConnector struct { client *github.Client db *database.DB // For logging userID *int // For logging } // NewGitHubConnector creates a new GitHub connector with an access token func NewGitHubConnector(accessToken string, db *database.DB, userID *int) *GitHubConnector { ctx := context.Background() ts := oauth2.StaticTokenSource( &oauth2.Token{AccessToken: accessToken}, ) tc := oauth2.NewClient(ctx, ts) client := github.NewClient(tc) return &GitHubConnector{ client: client, db: db, userID: userID, } } // GetType returns the connector type func (g *GitHubConnector) GetType() string { return "github" } // logCacheEvent logs cache events to the database func (g *GitHubConnector) logCacheEvent(cacheKey, eventType string, responseTimeMs int) { if g.db != nil { _ = g.db.LogCacheEvent(cacheKey, "file_tree", eventType, g.userID, responseTimeMs) } } // ListRepositories lists all repositories for the authenticated user func (g *GitHubConnector) ListRepositories(ctx context.Context, sortBy string) ([]Repository, error) { // Map sort parameter sort := "updated" switch sortBy { case "created": sort = "created" case "name": sort = "full_name" default: sort = "updated" } opts := &github.RepositoryListOptions{ Sort: sort, Direction: "desc", Affiliation: "owner,collaborator,organization_member", // Include all repos user has access to ListOptions: github.ListOptions{ PerPage: 100, }, } var allRepos []Repository for { repos, resp, err := g.client.Repositories.List(ctx, "", opts) if err != nil { return nil, fmt.Errorf("failed to list repositories (status: %v): %w", resp.StatusCode, err) } for _, repo := range repos { allRepos = append(allRepos, Repository{ ID: repo.GetID(), FullName: repo.GetFullName(), Name: repo.GetName(), Owner: repo.GetOwner().GetLogin(), Private: repo.GetPrivate(), DefaultBranch: repo.GetDefaultBranch(), UpdatedAt: repo.GetUpdatedAt().Time, }) } if resp.NextPage == 0 { break } opts.Page = resp.NextPage } return allRepos, nil } // ListFiles lists files in a repository path using GitHub Tree API with caching func (g *GitHubConnector) ListFiles(ctx context.Context, owner, repo, path, branch string, extensions []string) (*FileNode, error) { // Resolve default branch first to ensure consistent cache keys if branch == "" { repository, _, err := g.client.Repositories.Get(ctx, owner, repo) if err != nil { return nil, fmt.Errorf("failed to get repository: %w", err) } branch = repository.GetDefaultBranch() } cacheKey := generateCacheKey(owner, repo, branch, path, extensions) // Try cache first if cached, found := globalCache.Get(cacheKey); found { g.logCacheEvent(cacheKey, "hit", 0) return cached, nil } // Use singleflight to deduplicate concurrent requests result, err, _ := globalCache.sfGroup.Do(cacheKey, func() (interface{}, error) { startTime := time.Now() // Fetch from GitHub Tree API data, err := g.fetchTreeFromGitHub(ctx, owner, repo, branch, path, extensions) if err != nil { return nil, err } responseTime := int(time.Since(startTime).Milliseconds()) // Store in cache globalCache.Set(cacheKey, data) g.logCacheEvent(cacheKey, "miss", responseTime) return data, nil }) if err != nil { return nil, err } return result.(*FileNode), nil } // fetchTreeFromGitHub fetches file tree from GitHub using Tree API func (g *GitHubConnector) fetchTreeFromGitHub(ctx context.Context, owner, repo, branch, path string, extensions []string) (*FileNode, error) { // Step 1: Get branch to get commit SHA branchRef, _, err := g.client.Repositories.GetBranch(ctx, owner, repo, branch, 0) if err != nil { return nil, fmt.Errorf("failed to get branch: %w", err) } commitSHA := branchRef.GetCommit().GetSHA() // Step 2: Fetch tree recursively (single API call) tree, _, err := g.client.Git.GetTree(ctx, owner, repo, commitSHA, true) if err != nil { return nil, fmt.Errorf("failed to get tree: %w", err) } // Check for truncated response if tree.Truncated != nil && *tree.Truncated { return nil, fmt.Errorf("repository tree is truncated (>100k entries) - repository too large") } // Step 3: Filter tree entries by path and extensions var filteredEntries []*github.TreeEntry for _, entry := range tree.Entries { // Skip if not in requested path if path != "" && !strings.HasPrefix(*entry.Path, path) { continue } // Filter by extensions (only for blobs/files) if len(extensions) > 0 && *entry.Type == "blob" { if !matchesExtensions(*entry.Path, extensions) { continue } } filteredEntries = append(filteredEntries, entry) } // Step 4: Build hierarchical FileNode tree from flat structure root := buildFileTree(filteredEntries, path, extensions, repo) return root, nil } // matchesExtensions checks if file matches extension filter func matchesExtensions(filename string, extensions []string) bool { if len(extensions) == 0 { return true } ext := strings.TrimPrefix(filepath.Ext(filename), ".") for _, e := range extensions { if strings.EqualFold(ext, e) { return true } } return false } // buildFileTree converts flat GitHub tree entries to hierarchical FileNode structure func buildFileTree(entries []*github.TreeEntry, rootPath string, extensions []string, repoName string) *FileNode { root := &FileNode{ Name: repoName, Path: rootPath, Type: "directory", IsDir: true, Children: []*FileNode{}, } if rootPath != "" { root.Name = filepath.Base(rootPath) } // Group entries by their first path component pathGroups := make(map[string][]*github.TreeEntry) for _, entry := range entries { relativePath := *entry.Path if rootPath != "" { relativePath = strings.TrimPrefix(relativePath, rootPath) relativePath = strings.TrimPrefix(relativePath, "/") } if relativePath == "" { continue } // Get first component parts := strings.SplitN(relativePath, "/", 2) firstComponent := parts[0] pathGroups[firstComponent] = append(pathGroups[firstComponent], entry) } // Build tree recursively for name, groupEntries := range pathGroups { // Check if this is a file or directory isFile := len(groupEntries) == 1 && *groupEntries[0].Type == "blob" && !strings.Contains(strings.TrimPrefix(*groupEntries[0].Path, rootPath+"/"), "/") if isFile { // It's a file filePath := *groupEntries[0].Path root.Children = append(root.Children, &FileNode{ Name: name, Path: filePath, Type: "file", IsDir: false, SHA: *groupEntries[0].SHA, Size: int64(*groupEntries[0].Size), }) } else { // It's a directory - collect all direct children dirNode := &FileNode{ Name: name, Path: filepath.Join(rootPath, name), Type: "directory", IsDir: true, Children: []*FileNode{}, } // Get direct children of this directory directChildren := make(map[string][]*github.TreeEntry) dirPrefix := dirNode.Path + "/" for _, entry := range groupEntries { relativeToDir := strings.TrimPrefix(*entry.Path, dirPrefix) parts := strings.SplitN(relativeToDir, "/", 2) directChildren[parts[0]] = append(directChildren[parts[0]], entry) } // Recursively build subtree subTree := buildFileTree(groupEntries, dirNode.Path, extensions, name) dirNode.Children = subTree.Children // Only add directory if it has children (matching files) if len(dirNode.Children) > 0 { root.Children = append(root.Children, dirNode) } } } // Sort: directories first, then alphabetically sort.Slice(root.Children, func(i, j int) bool { if root.Children[i].IsDir != root.Children[j].IsDir { return root.Children[i].IsDir } return root.Children[i].Name < root.Children[j].Name }) return root } // GetFileContent retrieves the content of a file func (g *GitHubConnector) GetFileContent(ctx context.Context, owner, repo, path, branch string) (*FileContent, error) { if branch == "" { // Get default branch repository, _, err := g.client.Repositories.Get(ctx, owner, repo) if err != nil { return nil, fmt.Errorf("failed to get repository: %w", err) } branch = repository.GetDefaultBranch() } opts := &github.RepositoryContentGetOptions{ Ref: branch, } fileContent, _, _, err := g.client.Repositories.GetContents(ctx, owner, repo, path, opts) if err != nil { return nil, fmt.Errorf("failed to get file content: %w", err) } if fileContent == nil { return nil, fmt.Errorf("file not found") } content, err := fileContent.GetContent() if err != nil { return nil, fmt.Errorf("failed to decode content: %w", err) } // Parse markdown with frontmatter parsed, err := markdown.Parse(content) if err != nil { return nil, fmt.Errorf("failed to parse markdown: %w", err) } return &FileContent{ Content: parsed.Content, Frontmatter: parsed.Frontmatter, Path: path, SHA: fileContent.GetSHA(), Branch: branch, }, nil } // InvalidateCacheForRepo invalidates all cache entries for a repository func InvalidateCacheForRepo(owner, repo string) int { pattern := fmt.Sprintf("%s/%s/*", owner, repo) return globalCache.InvalidatePattern(pattern) } // CreateFile creates a new file in the repository func (g *GitHubConnector) CreateFile(ctx context.Context, owner, repo, path, content, message string) error { if message == "" { message = fmt.Sprintf("Create %s", path) } // Get default branch repository, _, err := g.client.Repositories.Get(ctx, owner, repo) if err != nil { return fmt.Errorf("failed to get repository: %w", err) } branch := repository.GetDefaultBranch() // Encode content to base64 encodedContent := github.String(base64.StdEncoding.EncodeToString([]byte(content))) // Create file options opts := &github.RepositoryContentFileOptions{ Message: github.String(message), Content: []byte(*encodedContent), Branch: github.String(branch), } // Create the file _, _, err = g.client.Repositories.CreateFile(ctx, owner, repo, path, opts) if err != nil { return fmt.Errorf("failed to create file: %w", err) } return nil } // RenameItem renames a file or folder by deleting the old path and creating at the new path func (g *GitHubConnector) RenameItem(ctx context.Context, owner, repo, oldPath, newPath, message string) error { if message == "" { message = fmt.Sprintf("Rename %s to %s", oldPath, newPath) } // Get default branch repository, _, err := g.client.Repositories.Get(ctx, owner, repo) if err != nil { return fmt.Errorf("failed to get repository: %w", err) } branch := repository.GetDefaultBranch() // Get the content of the old file/path fileContent, _, _, err := g.client.Repositories.GetContents(ctx, owner, repo, oldPath, &github.RepositoryContentGetOptions{ Ref: branch, }) if err != nil { return fmt.Errorf("failed to get file content: %w", err) } // If it's a directory, we need to handle all files in it if fileContent == nil { // It's a directory - get all files in it _, directoryContent, _, err := g.client.Repositories.GetContents(ctx, owner, repo, oldPath, &github.RepositoryContentGetOptions{ Ref: branch, }) if err != nil { return fmt.Errorf("failed to get directory contents: %w", err) } // Rename each file in the directory for _, item := range directoryContent { if item.GetType() == "file" { oldFilePath := item.GetPath() newFilePath := strings.Replace(oldFilePath, oldPath, newPath, 1) // Get file content content, err := item.GetContent() if err != nil { return fmt.Errorf("failed to get content for %s: %w", oldFilePath, err) } // Create new file encodedContent := base64.StdEncoding.EncodeToString([]byte(content)) opts := &github.RepositoryContentFileOptions{ Message: github.String(message), Content: []byte(encodedContent), Branch: github.String(branch), } _, _, err = g.client.Repositories.CreateFile(ctx, owner, repo, newFilePath, opts) if err != nil { return fmt.Errorf("failed to create file %s: %w", newFilePath, err) } // Delete old file deleteOpts := &github.RepositoryContentFileOptions{ Message: github.String(message), SHA: github.String(item.GetSHA()), Branch: github.String(branch), } _, _, err = g.client.Repositories.DeleteFile(ctx, owner, repo, oldFilePath, deleteOpts) if err != nil { return fmt.Errorf("failed to delete file %s: %w", oldFilePath, err) } } } } else { // It's a single file content, err := fileContent.GetContent() if err != nil { return fmt.Errorf("failed to get file content: %w", err) } // Create new file at new path encodedContent := base64.StdEncoding.EncodeToString([]byte(content)) opts := &github.RepositoryContentFileOptions{ Message: github.String(message), Content: []byte(encodedContent), Branch: github.String(branch), } _, _, err = g.client.Repositories.CreateFile(ctx, owner, repo, newPath, opts) if err != nil { return fmt.Errorf("failed to create file at new path: %w", err) } // Delete old file deleteOpts := &github.RepositoryContentFileOptions{ Message: github.String(message), SHA: github.String(fileContent.GetSHA()), Branch: github.String(branch), } _, _, err = g.client.Repositories.DeleteFile(ctx, owner, repo, oldPath, deleteOpts) if err != nil { return fmt.Errorf("failed to delete old file: %w", err) } } return nil }