[mirror] Scalable static site server for Git forges (like GitHub Pages)

Implement `-audit-server`.

To use this function, configure git-pages with e.g.:

[audit]
collect = true
notify-url = "http://localhost:3004/"

and run an audit server with e.g.:

git-pages -audit-server tcp/:3004 python $(pwd)/process.py

The provided command line is executed after appending two arguments
(audit record ID and event type), and runs in a temporary directory
with the audit record extracted into it. The following files will
be present in this directory:
* `$1-event.json` (always)
* `$1-manifest.json` (if type is `CommitManifest`)
* `$1-archive.tar` (if type is `CommitManifest`)

The script must complete successfully for the event processing to
finish. The notification will keep being re-sent (by the worker) with
exponential backoff until it does.

Changed files
+148 -18
src
+1
.gitignore
··· 4 4 /data 5 5 /config*.toml* 6 6 /git-pages 7 + /site
+127 -7
src/audit.go
··· 4 4 "cmp" 5 5 "context" 6 6 "fmt" 7 + "io" 7 8 "net/http" 9 + "os" 10 + "os/exec" 11 + "path/filepath" 8 12 "strconv" 9 13 "strings" 10 14 "time" ··· 147 151 return json 148 152 } 149 153 154 + // This function receives `id` and `record` separately because the record itself may have its 155 + // ID missing or mismatched. While this is very unlikely, using the actual primary key as 156 + // the filename is more robust. 157 + func ExtractAuditRecord(ctx context.Context, id AuditID, record *AuditRecord, dest string) error { 158 + const mode = 0o400 // readable by current user, not writable 159 + 160 + err := os.WriteFile(filepath.Join(dest, fmt.Sprintf("%s-event.json", id)), 161 + AuditRecordJSON(record, AuditRecordNoManifest), mode) 162 + if err != nil { 163 + return err 164 + } 165 + 166 + if record.Manifest != nil { 167 + err = os.WriteFile(filepath.Join(dest, fmt.Sprintf("%s-manifest.json", id)), 168 + ManifestJSON(record.Manifest), mode) 169 + if err != nil { 170 + return err 171 + } 172 + 173 + archive, err := os.OpenFile(filepath.Join(dest, fmt.Sprintf("%s-archive.tar", id)), 174 + os.O_CREATE|os.O_TRUNC|os.O_WRONLY, mode) 175 + if err != nil { 176 + return err 177 + } 178 + defer archive.Close() 179 + 180 + err = CollectTar(ctx, archive, record.Manifest, ManifestMetadata{}) 181 + if err != nil { 182 + return err 183 + } 184 + } 185 + 186 + return nil 187 + } 188 + 189 + func AuditEventProcessor(command string, args []string) (http.Handler, error) { 190 + var err error 191 + 192 + // Resolve the command to an absolute path, as it will be run from a different current 193 + // directory, which would break e.g. `git-pages -audit-server tcp/:3004 ./handler.sh`. 194 + if command, err = exec.LookPath(command); err != nil { 195 + return nil, err 196 + } 197 + if command, err = filepath.Abs(command); err != nil { 198 + return nil, err 199 + } 200 + 201 + router := http.NewServeMux() 202 + router.Handle("GET /", http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 203 + // Go will cancel the request context if the client drops the connection. We don't want 204 + // that to interrupt processing. However, we also want the client (not the server) to 205 + // handle retries, so instead of spawning a goroutine to process the event, we do this 206 + // within the HTTP handler. If an error is returned, the notify goroutine in the worker 207 + // will retry the HTTP request (with backoff) until it succeeds. 208 + // 209 + // This is a somewhat idiosyncratic design and it's not clear that this is the best 210 + // possible approach (e.g. if the worker gets restarted and the event processing fails, 211 + // it will not be retried), but it should do the job for now. It is expected that 212 + // some form of observability is used to highlight event processor errors. 213 + ctx := context.WithoutCancel(r.Context()) 214 + 215 + id, err := ParseAuditID(r.URL.RawQuery) 216 + if err != nil { 217 + logc.Printf(ctx, "audit process err: malformed query\n") 218 + http.Error(w, "malformed query", http.StatusBadRequest) 219 + return 220 + } 221 + 222 + record, err := backend.QueryAuditLog(ctx, id) 223 + if err != nil { 224 + logc.Printf(ctx, "audit process err: missing record\n") 225 + http.Error(w, "missing record", http.StatusNotFound) 226 + return 227 + } 228 + 229 + args := append(args, id.String(), record.GetEvent().String()) 230 + cmd := exec.CommandContext(ctx, command, args...) 231 + if cmd.Dir, err = os.MkdirTemp("", "auditRecord"); err != nil { 232 + panic(fmt.Errorf("mkdtemp: %w", err)) 233 + } 234 + defer os.RemoveAll(cmd.Dir) 235 + 236 + if err = ExtractAuditRecord(ctx, id, record, cmd.Dir); err != nil { 237 + logc.Printf(ctx, "audit process %s err: %s\n", id, err) 238 + http.Error(w, err.Error(), http.StatusInternalServerError) 239 + return 240 + } 241 + 242 + output, err := cmd.CombinedOutput() 243 + if err != nil { 244 + logc.Printf(ctx, "audit process %s err: %s; %s\n", id, err, string(output)) 245 + w.WriteHeader(http.StatusServiceUnavailable) 246 + if len(output) == 0 { 247 + fmt.Fprintln(w, err.Error()) 248 + } 249 + } else { 250 + logc.Printf(ctx, "audit process %s ok: %s\n", id, string(output)) 251 + w.WriteHeader(http.StatusOK) 252 + } 253 + w.Write(output) 254 + })) 255 + return router, nil 256 + } 257 + 150 258 type auditedBackend struct { 151 259 Backend 152 260 } ··· 199 307 notifyURL := config.Audit.NotifyURL.URL 200 308 notifyURL.RawQuery = id.String() 201 309 310 + // See also the explanation in `AuditEventProcessor` above. 202 311 go func() { 203 312 backoff := exponential.Backoff{ 204 313 Jitter: true, ··· 206 315 Max: time.Second * 60, 207 316 } 208 317 for { 209 - _, err := http.Get(notifyURL.String()) 210 - if err != nil { 318 + resp, err := http.Get(notifyURL.String()) 319 + var body []byte 320 + if err == nil { 321 + defer resp.Body.Close() 322 + body, _ = io.ReadAll(resp.Body) 323 + } 324 + if err == nil && resp.StatusCode == http.StatusOK { 325 + logc.Printf(ctx, "audit notify %s ok: %s\n", id, string(body)) 326 + auditNotifyOkCount.Inc() 327 + break 328 + } else { 211 329 sleepFor := backoff.Duration() 212 - logc.Printf(ctx, "audit notify %s err: %s (retry in %s)", id, err, sleepFor) 330 + if err != nil { 331 + logc.Printf(ctx, "audit notify %s err: %s (retry in %s)", 332 + id, err, sleepFor) 333 + } else { 334 + logc.Printf(ctx, "audit notify %s fail: %s (retry in %s); %s", 335 + id, resp.Status, sleepFor, string(body)) 336 + } 213 337 auditNotifyErrorCount.Inc() 214 338 time.Sleep(sleepFor) 215 - } else { 216 - logc.Printf(ctx, "audit notify %s ok", id) 217 - auditNotifyOkCount.Inc() 218 - break 219 339 } 220 340 } 221 341 }()
+1 -2
src/fetch.go
··· 41 41 var storer *filesystem.Storage 42 42 for _, filter := range []packp.Filter{packp.FilterBlobNone(), packp.Filter("")} { 43 43 var tempDir string 44 - tempDir, err = os.MkdirTemp("", "fetchRepo") 45 - if err != nil { 44 + if tempDir, err = os.MkdirTemp("", "fetchRepo"); err != nil { 46 45 return nil, fmt.Errorf("mkdtemp: %w", err) 47 46 } 48 47 defer os.RemoveAll(tempDir)
+19 -9
src/main.go
··· 177 177 fmt.Fprintf(os.Stderr, "(admin) "+ 178 178 "git-pages {-run-migration <name>|-freeze-domain <domain>|-unfreeze-domain <domain>}\n") 179 179 fmt.Fprintf(os.Stderr, "(audit) "+ 180 - "git-pages {-audit-log|-audit-read <id>}\n") 180 + "git-pages {-audit-log|-audit-read <id>|-audit-server <endpoint> <program> [args...]}\n") 181 181 fmt.Fprintf(os.Stderr, "(info) "+ 182 182 "git-pages {-print-config-env-vars|-print-config}\n") 183 183 fmt.Fprintf(os.Stderr, "(cli) "+ ··· 215 215 "display audit log") 216 216 auditRead := flag.String("audit-read", "", 217 217 "extract contents of audit record `id` to files '<id>-*'") 218 + auditServer := flag.String("audit-server", "", 219 + "listen for notifications on `endpoint` and spawn a process for each audit event") 218 220 flag.Parse() 219 221 220 222 var cliOperations int ··· 228 230 *unfreezeDomain != "", 229 231 *auditLog, 230 232 *auditRead != "", 233 + *auditServer != "", 231 234 } { 232 235 if selected { 233 236 cliOperations++ ··· 469 472 logc.Fatalln(ctx, err) 470 473 } 471 474 472 - errEvent := os.WriteFile(fmt.Sprintf("%s-event.json", id), 473 - AuditRecordJSON(record, AuditRecordNoManifest), 0o400) 474 - errManifest := os.WriteFile(fmt.Sprintf("%s-manifest.json", id), 475 - ManifestJSON(record.Manifest), 0o400) 476 - fileArchive, errArchive := os.OpenFile(fmt.Sprintf("%s-archive.tar", id), 477 - os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0o400) 478 - if err = errors.Join(errEvent, errManifest, errArchive); err != nil { 475 + if err = ExtractAuditRecord(ctx, id, record, "."); err != nil { 476 + logc.Fatalln(ctx, err) 477 + } 478 + 479 + case *auditServer != "": 480 + if backend, err = CreateBackend(ctx, &config.Storage); err != nil { 479 481 logc.Fatalln(ctx, err) 480 482 } 481 - if err = CollectTar(ctx, fileArchive, record.Manifest, ManifestMetadata{}); err != nil { 483 + 484 + if flag.NArg() < 1 { 485 + logc.Fatalln(ctx, "handler path not provided") 486 + } 487 + 488 + processor, err := AuditEventProcessor(flag.Arg(0), flag.Args()[1:]) 489 + if err != nil { 482 490 logc.Fatalln(ctx, err) 483 491 } 492 + 493 + serve(ctx, listen(ctx, "audit", *auditServer), ObserveHTTPHandler(processor)) 484 494 485 495 default: 486 496 // Hook a signal (SIGHUP on *nix, nothing on Windows) for reloading the configuration