cli + tui to publish to leaflet (wip) & manage tasks, notes & watch/read lists 馃崈
charm
leaflet
readability
golang
1//go:build !prod
2
3package tools
4
5import (
6 "archive/tar"
7 "compress/gzip"
8 "context"
9 "encoding/json"
10 "fmt"
11 "io"
12 "net/http"
13 "os"
14 "path/filepath"
15 "strings"
16 "time"
17
18 "github.com/spf13/cobra"
19)
20
21// GitHubCommit represents a GitHub API commit response
22type GitHubCommit struct {
23 SHA string `json:"sha"`
24 Commit struct {
25 Message string `json:"message"`
26 } `json:"commit"`
27}
28
29// ArchiveConfig contains configuration for fetching and extracting archives
30type ArchiveConfig struct {
31 Repo string
32 Path string
33 Output string
34 SHA string
35 FormatJSON bool
36}
37
38// NewGHRepoCommand creates a command for fetching GitHub repository archives
39func NewGHRepoCommand() *cobra.Command {
40 var config ArchiveConfig
41
42 cmd := &cobra.Command{
43 Use: "gh-repo",
44 Short: "Fetch and extract files from a GitHub repository archive",
45 Long: `Fetches a GitHub repository archive (tarball), extracts specific paths,
46and optionally formats JSON files using Go's standard library.
47
48This is useful for syncing lexicons, schemas, or other data files from GitHub repositories.`,
49 Example: ` # Fetch lexicons from a specific path
50 noteleaf tools fetch gh-repo \
51 --repo hyperlink-academy/leaflet \
52 --path lexicons/pub/leaflet/ \
53 --output lexdocs/leaflet/
54
55 # Fetch from a specific commit
56 noteleaf tools fetch gh-repo \
57 --repo owner/repo \
58 --path schemas/ \
59 --output local/schemas/ \
60 --sha abc123def`,
61 RunE: func(cmd *cobra.Command, args []string) error {
62 if config.Repo == "" {
63 return fmt.Errorf("--repo is required")
64 }
65 if config.Path == "" {
66 return fmt.Errorf("--path is required")
67 }
68 if config.Output == "" {
69 return fmt.Errorf("--output is required")
70 }
71
72 ctx := cmd.Context()
73 if ctx == nil {
74 ctx = context.Background()
75 }
76
77 return fetchAndExtractArchive(ctx, config, cmd.OutOrStdout())
78 },
79 }
80
81 cmd.Flags().StringVar(&config.Repo, "repo", "", "GitHub repository (owner/name)")
82 cmd.Flags().StringVar(&config.Path, "path", "", "Path within repository to extract")
83 cmd.Flags().StringVar(&config.Output, "output", "", "Output directory for extracted files")
84 cmd.Flags().StringVar(&config.SHA, "sha", "", "Specific commit SHA (default: latest)")
85 cmd.Flags().BoolVar(&config.FormatJSON, "format-json", true, "Format JSON files with indentation")
86 return cmd
87}
88
89// fetchAndExtractArchive fetches a GitHub archive and extracts specific paths
90func fetchAndExtractArchive(ctx context.Context, config ArchiveConfig, out io.Writer) error {
91 sha := config.SHA
92 if sha == "" {
93 var err error
94 sha, err = getLatestCommit(ctx, config.Repo, config.Path)
95 if err != nil {
96 return fmt.Errorf("failed to get latest commit: %w", err)
97 }
98 fmt.Fprintf(out, "Latest commit: %s\n", sha)
99 }
100
101 tmpDir, err := os.MkdirTemp("", "repo-archive-*")
102 if err != nil {
103 return fmt.Errorf("failed to create temp directory: %w", err)
104 }
105 defer os.RemoveAll(tmpDir)
106
107 fmt.Fprintf(out, "Fetching archive for %s@%s\n", config.Repo, sha[:7])
108 if err := downloadAndExtract(ctx, config.Repo, sha, config.Path, tmpDir, config.FormatJSON, out); err != nil {
109 return fmt.Errorf("failed to download and extract: %w", err)
110 }
111
112 fmt.Fprintf(out, "Writing README with source information\n")
113 readme := fmt.Sprintf("Source: https://github.com/%s/tree/%s/%s\n", config.Repo, sha, config.Path)
114 if err := os.WriteFile(filepath.Join(tmpDir, "README.md"), []byte(readme), 0o644); err != nil {
115 return fmt.Errorf("failed to write README: %w", err)
116 }
117
118 fmt.Fprintf(out, "Moving extracted files to %s\n", config.Output)
119 if err := os.RemoveAll(config.Output); err != nil {
120 return fmt.Errorf("failed to remove existing output directory: %w", err)
121 }
122 if err := os.Rename(tmpDir, config.Output); err != nil {
123 return fmt.Errorf("failed to move files to output directory: %w", err)
124 }
125
126 fmt.Fprintf(out, "Successfully extracted archive to %s\n", config.Output)
127 return nil
128}
129
130// getLatestCommit fetches the latest commit SHA for a given repository and path
131func getLatestCommit(ctx context.Context, repo, path string) (string, error) {
132 url := fmt.Sprintf("https://api.github.com/repos/%s/commits?path=%s&per_page=1", repo, path)
133
134 req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
135 if err != nil {
136 return "", err
137 }
138
139 client := &http.Client{Timeout: 30 * time.Second}
140 resp, err := client.Do(req)
141 if err != nil {
142 return "", err
143 }
144 defer resp.Body.Close()
145
146 if resp.StatusCode != http.StatusOK {
147 return "", fmt.Errorf("GitHub API returned status %d", resp.StatusCode)
148 }
149
150 var commits []GitHubCommit
151 if err := json.NewDecoder(resp.Body).Decode(&commits); err != nil {
152 return "", fmt.Errorf("failed to decode response: %w", err)
153 }
154
155 if len(commits) == 0 {
156 return "", fmt.Errorf("no commits found for path %s", path)
157 }
158
159 return commits[0].SHA, nil
160}
161
162// downloadAndExtract downloads a GitHub archive and extracts files from a specific path
163func downloadAndExtract(ctx context.Context, repo, sha, extractPath, outputDir string, formatJSON bool, out io.Writer) error {
164 url := fmt.Sprintf("https://github.com/%s/archive/%s.tar.gz", repo, sha)
165
166 req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
167 if err != nil {
168 return err
169 }
170
171 client := &http.Client{Timeout: 5 * time.Minute}
172 resp, err := client.Do(req)
173 if err != nil {
174 return err
175 }
176 defer resp.Body.Close()
177
178 if resp.StatusCode != http.StatusOK {
179 return fmt.Errorf("failed to download archive: status %d", resp.StatusCode)
180 }
181
182 gzr, err := gzip.NewReader(resp.Body)
183 if err != nil {
184 return fmt.Errorf("failed to create gzip reader: %w", err)
185 }
186 defer gzr.Close()
187
188 tr := tar.NewReader(gzr)
189
190 repoName := strings.Split(repo, "/")[1]
191 prefix := fmt.Sprintf("%s-%s/%s", repoName, sha, extractPath)
192
193 fmt.Fprintf(out, "Extracting files from %s\n", prefix)
194
195 fileCount := 0
196 for {
197 header, err := tr.Next()
198 if err == io.EOF {
199 break
200 }
201 if err != nil {
202 return fmt.Errorf("failed to read tar header: %w", err)
203 }
204
205 if header.Typeflag != tar.TypeReg {
206 continue
207 }
208
209 if !strings.HasPrefix(header.Name, prefix) {
210 continue
211 }
212
213 if !strings.HasSuffix(header.Name, ".json") {
214 continue
215 }
216
217 relativePath := strings.TrimPrefix(header.Name, prefix)
218 outputPath := filepath.Join(outputDir, relativePath)
219
220 if err := os.MkdirAll(filepath.Dir(outputPath), 0o755); err != nil {
221 return fmt.Errorf("failed to create directory for %s: %w", outputPath, err)
222 }
223
224 data, err := io.ReadAll(tr)
225 if err != nil {
226 return fmt.Errorf("failed to read file %s: %w", header.Name, err)
227 }
228
229 if formatJSON {
230 var jsonData any
231 if err := json.Unmarshal(data, &jsonData); err != nil {
232 return fmt.Errorf("failed to parse JSON in %s: %w", header.Name, err)
233 }
234
235 formattedData, err := json.MarshalIndent(jsonData, "", " ")
236 if err != nil {
237 return fmt.Errorf("failed to format JSON in %s: %w", header.Name, err)
238 }
239 data = append(formattedData, '\n')
240 }
241
242 if err := os.WriteFile(outputPath, data, 0o644); err != nil {
243 return fmt.Errorf("failed to write file %s: %w", outputPath, err)
244 }
245
246 fileCount++
247 }
248
249 fmt.Fprintf(out, "Extracted %d files\n", fileCount)
250 return nil
251}