cli + tui to publish to leaflet (wip) & manage tasks, notes & watch/read lists 馃崈
charm leaflet readability golang
at main 251 lines 7.2 kB view raw
1//go:build !prod 2 3package tools 4 5import ( 6 "archive/tar" 7 "compress/gzip" 8 "context" 9 "encoding/json" 10 "fmt" 11 "io" 12 "net/http" 13 "os" 14 "path/filepath" 15 "strings" 16 "time" 17 18 "github.com/spf13/cobra" 19) 20 21// GitHubCommit represents a GitHub API commit response 22type GitHubCommit struct { 23 SHA string `json:"sha"` 24 Commit struct { 25 Message string `json:"message"` 26 } `json:"commit"` 27} 28 29// ArchiveConfig contains configuration for fetching and extracting archives 30type ArchiveConfig struct { 31 Repo string 32 Path string 33 Output string 34 SHA string 35 FormatJSON bool 36} 37 38// NewGHRepoCommand creates a command for fetching GitHub repository archives 39func NewGHRepoCommand() *cobra.Command { 40 var config ArchiveConfig 41 42 cmd := &cobra.Command{ 43 Use: "gh-repo", 44 Short: "Fetch and extract files from a GitHub repository archive", 45 Long: `Fetches a GitHub repository archive (tarball), extracts specific paths, 46and optionally formats JSON files using Go's standard library. 47 48This is useful for syncing lexicons, schemas, or other data files from GitHub repositories.`, 49 Example: ` # Fetch lexicons from a specific path 50 noteleaf tools fetch gh-repo \ 51 --repo hyperlink-academy/leaflet \ 52 --path lexicons/pub/leaflet/ \ 53 --output lexdocs/leaflet/ 54 55 # Fetch from a specific commit 56 noteleaf tools fetch gh-repo \ 57 --repo owner/repo \ 58 --path schemas/ \ 59 --output local/schemas/ \ 60 --sha abc123def`, 61 RunE: func(cmd *cobra.Command, args []string) error { 62 if config.Repo == "" { 63 return fmt.Errorf("--repo is required") 64 } 65 if config.Path == "" { 66 return fmt.Errorf("--path is required") 67 } 68 if config.Output == "" { 69 return fmt.Errorf("--output is required") 70 } 71 72 ctx := cmd.Context() 73 if ctx == nil { 74 ctx = context.Background() 75 } 76 77 return fetchAndExtractArchive(ctx, config, cmd.OutOrStdout()) 78 }, 79 } 80 81 cmd.Flags().StringVar(&config.Repo, "repo", "", "GitHub repository (owner/name)") 82 cmd.Flags().StringVar(&config.Path, "path", "", "Path within repository to extract") 83 cmd.Flags().StringVar(&config.Output, "output", "", "Output directory for extracted files") 84 cmd.Flags().StringVar(&config.SHA, "sha", "", "Specific commit SHA (default: latest)") 85 cmd.Flags().BoolVar(&config.FormatJSON, "format-json", true, "Format JSON files with indentation") 86 return cmd 87} 88 89// fetchAndExtractArchive fetches a GitHub archive and extracts specific paths 90func fetchAndExtractArchive(ctx context.Context, config ArchiveConfig, out io.Writer) error { 91 sha := config.SHA 92 if sha == "" { 93 var err error 94 sha, err = getLatestCommit(ctx, config.Repo, config.Path) 95 if err != nil { 96 return fmt.Errorf("failed to get latest commit: %w", err) 97 } 98 fmt.Fprintf(out, "Latest commit: %s\n", sha) 99 } 100 101 tmpDir, err := os.MkdirTemp("", "repo-archive-*") 102 if err != nil { 103 return fmt.Errorf("failed to create temp directory: %w", err) 104 } 105 defer os.RemoveAll(tmpDir) 106 107 fmt.Fprintf(out, "Fetching archive for %s@%s\n", config.Repo, sha[:7]) 108 if err := downloadAndExtract(ctx, config.Repo, sha, config.Path, tmpDir, config.FormatJSON, out); err != nil { 109 return fmt.Errorf("failed to download and extract: %w", err) 110 } 111 112 fmt.Fprintf(out, "Writing README with source information\n") 113 readme := fmt.Sprintf("Source: https://github.com/%s/tree/%s/%s\n", config.Repo, sha, config.Path) 114 if err := os.WriteFile(filepath.Join(tmpDir, "README.md"), []byte(readme), 0o644); err != nil { 115 return fmt.Errorf("failed to write README: %w", err) 116 } 117 118 fmt.Fprintf(out, "Moving extracted files to %s\n", config.Output) 119 if err := os.RemoveAll(config.Output); err != nil { 120 return fmt.Errorf("failed to remove existing output directory: %w", err) 121 } 122 if err := os.Rename(tmpDir, config.Output); err != nil { 123 return fmt.Errorf("failed to move files to output directory: %w", err) 124 } 125 126 fmt.Fprintf(out, "Successfully extracted archive to %s\n", config.Output) 127 return nil 128} 129 130// getLatestCommit fetches the latest commit SHA for a given repository and path 131func getLatestCommit(ctx context.Context, repo, path string) (string, error) { 132 url := fmt.Sprintf("https://api.github.com/repos/%s/commits?path=%s&per_page=1", repo, path) 133 134 req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil) 135 if err != nil { 136 return "", err 137 } 138 139 client := &http.Client{Timeout: 30 * time.Second} 140 resp, err := client.Do(req) 141 if err != nil { 142 return "", err 143 } 144 defer resp.Body.Close() 145 146 if resp.StatusCode != http.StatusOK { 147 return "", fmt.Errorf("GitHub API returned status %d", resp.StatusCode) 148 } 149 150 var commits []GitHubCommit 151 if err := json.NewDecoder(resp.Body).Decode(&commits); err != nil { 152 return "", fmt.Errorf("failed to decode response: %w", err) 153 } 154 155 if len(commits) == 0 { 156 return "", fmt.Errorf("no commits found for path %s", path) 157 } 158 159 return commits[0].SHA, nil 160} 161 162// downloadAndExtract downloads a GitHub archive and extracts files from a specific path 163func downloadAndExtract(ctx context.Context, repo, sha, extractPath, outputDir string, formatJSON bool, out io.Writer) error { 164 url := fmt.Sprintf("https://github.com/%s/archive/%s.tar.gz", repo, sha) 165 166 req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil) 167 if err != nil { 168 return err 169 } 170 171 client := &http.Client{Timeout: 5 * time.Minute} 172 resp, err := client.Do(req) 173 if err != nil { 174 return err 175 } 176 defer resp.Body.Close() 177 178 if resp.StatusCode != http.StatusOK { 179 return fmt.Errorf("failed to download archive: status %d", resp.StatusCode) 180 } 181 182 gzr, err := gzip.NewReader(resp.Body) 183 if err != nil { 184 return fmt.Errorf("failed to create gzip reader: %w", err) 185 } 186 defer gzr.Close() 187 188 tr := tar.NewReader(gzr) 189 190 repoName := strings.Split(repo, "/")[1] 191 prefix := fmt.Sprintf("%s-%s/%s", repoName, sha, extractPath) 192 193 fmt.Fprintf(out, "Extracting files from %s\n", prefix) 194 195 fileCount := 0 196 for { 197 header, err := tr.Next() 198 if err == io.EOF { 199 break 200 } 201 if err != nil { 202 return fmt.Errorf("failed to read tar header: %w", err) 203 } 204 205 if header.Typeflag != tar.TypeReg { 206 continue 207 } 208 209 if !strings.HasPrefix(header.Name, prefix) { 210 continue 211 } 212 213 if !strings.HasSuffix(header.Name, ".json") { 214 continue 215 } 216 217 relativePath := strings.TrimPrefix(header.Name, prefix) 218 outputPath := filepath.Join(outputDir, relativePath) 219 220 if err := os.MkdirAll(filepath.Dir(outputPath), 0o755); err != nil { 221 return fmt.Errorf("failed to create directory for %s: %w", outputPath, err) 222 } 223 224 data, err := io.ReadAll(tr) 225 if err != nil { 226 return fmt.Errorf("failed to read file %s: %w", header.Name, err) 227 } 228 229 if formatJSON { 230 var jsonData any 231 if err := json.Unmarshal(data, &jsonData); err != nil { 232 return fmt.Errorf("failed to parse JSON in %s: %w", header.Name, err) 233 } 234 235 formattedData, err := json.MarshalIndent(jsonData, "", " ") 236 if err != nil { 237 return fmt.Errorf("failed to format JSON in %s: %w", header.Name, err) 238 } 239 data = append(formattedData, '\n') 240 } 241 242 if err := os.WriteFile(outputPath, data, 0o644); err != nil { 243 return fmt.Errorf("failed to write file %s: %w", outputPath, err) 244 } 245 246 fileCount++ 247 } 248 249 fmt.Fprintf(out, "Extracted %d files\n", fileCount) 250 return nil 251}