Mirror of @tangled.org/core. Running on a Raspberry Pi Zero 2 (Please be gentle).

spindle: rework logging

docker always writes to disk, this is then streamed on demand via the
Logs endpoint

Signed-off-by: oppiliappan <me@oppi.li>

oppi.li 9c251ff8 944d0f1a

verified
+101 -272
+8 -117
spindle/engine/engine.go
··· 1 1 package engine 2 2 3 3 import ( 4 - "bufio" 5 4 "context" 6 5 "errors" 7 6 "fmt" ··· 38 39 n *notifier.Notifier 39 40 cfg *config.Config 40 41 41 - chanMu sync.RWMutex 42 - stdoutChans map[string]chan string 43 - stderrChans map[string]chan string 44 - 45 42 cleanupMu sync.Mutex 46 43 cleanup map[string][]cleanupFunc 47 44 } ··· 57 62 n: n, 58 63 cfg: cfg, 59 64 } 60 - 61 - e.stdoutChans = make(map[string]chan string, 100) 62 - e.stderrChans = make(map[string]chan string, 100) 63 65 64 66 e.cleanup = make(map[string][]cleanupFunc) 65 67 ··· 180 188 } 181 189 e.l.Info("using step timeout", "timeout", stepTimeout) 182 190 183 - e.chanMu.Lock() 184 - if _, exists := e.stdoutChans[wid.String()]; !exists { 185 - e.stdoutChans[wid.String()] = make(chan string, 100) 186 - } 187 - if _, exists := e.stderrChans[wid.String()]; !exists { 188 - e.stderrChans[wid.String()] = make(chan string, 100) 189 - } 190 - e.chanMu.Unlock() 191 - 192 - // close channels after all steps are complete 193 - defer func() { 194 - close(e.stdoutChans[wid.String()]) 195 - close(e.stderrChans[wid.String()]) 196 - }() 197 - 198 191 for stepIdx, step := range steps { 199 192 envs := ConstructEnvs(step.Environment) 200 193 envs.AddEnv("HOME", workspaceDir) ··· 259 282 260 283 if state.ExitCode != 0 { 261 284 e.l.Error("workflow failed!", "workflow_id", wid.String(), "error", state.Error, "exit_code", state.ExitCode, "oom_killed", state.OOMKilled) 262 - err := e.db.StatusFailed(wid, state.Error, int64(state.ExitCode), e.n) 263 - if err != nil { 264 - return err 265 - } 266 285 return fmt.Errorf("error: %s, exit code: %d, oom: %t", state.Error, state.ExitCode, state.OOMKilled) 267 286 } 268 287 } ··· 291 318 Follow: true, 292 319 ShowStdout: true, 293 320 ShowStderr: true, 294 - Details: true, 321 + Details: false, 295 322 Timestamps: false, 296 323 }) 297 324 if err != nil { 298 325 return err 299 326 } 300 327 301 - stepLogger, err := NewStepLogger(e.cfg.Pipelines.LogDir, wid.String(), stepIdx) 328 + wfLogger, err := NewWorkflowLogger(e.cfg.Pipelines.LogDir, wid) 302 329 if err != nil { 303 330 e.l.Warn("failed to setup step logger; logs will not be persisted", "error", err) 331 + return err 304 332 } 333 + defer wfLogger.Close() 305 334 306 - var logOutput io.Writer = io.Discard 307 - 308 - if e.cfg.Server.Dev { 309 - logOutput = &ansiStrippingWriter{underlying: os.Stdout} 335 + _, err = stdcopy.StdCopy(wfLogger.Stdout(), wfLogger.Stderr(), logs) 336 + if err != nil && err != io.EOF && !errors.Is(err, context.DeadlineExceeded) { 337 + return fmt.Errorf("failed to copy logs: %w", err) 310 338 } 311 - 312 - tee := io.TeeReader(logs, logOutput) 313 - 314 - // using StdCopy we demux logs and stream stdout and stderr to different 315 - // channels. 316 - // 317 - // stdout w||r stdoutCh 318 - // stderr w||r stderrCh 319 - // 320 - 321 - rpipeOut, wpipeOut := io.Pipe() 322 - rpipeErr, wpipeErr := io.Pipe() 323 - 324 - // sets up a io.MultiWriter to write to both the pipe 325 - // and the file-based logger. 326 - multiOut := io.MultiWriter(wpipeOut, stepLogger.Stdout()) 327 - multiErr := io.MultiWriter(wpipeErr, stepLogger.Stderr()) 328 - 329 - wg := sync.WaitGroup{} 330 - 331 - wg.Add(1) 332 - go func() { 333 - defer wg.Done() 334 - defer wpipeOut.Close() 335 - defer wpipeErr.Close() 336 - defer stepLogger.Close() 337 - _, err := stdcopy.StdCopy(multiOut, multiErr, tee) 338 - if err != nil && err != io.EOF && !errors.Is(context.DeadlineExceeded, err) { 339 - e.l.Error("failed to copy logs", "error", err) 340 - } 341 - }() 342 - 343 - // read from stdout and send to stdout pipe 344 - // NOTE: the stdoutCh channnel is closed further up in StartSteps 345 - // once all steps are done. 346 - wg.Add(1) 347 - go func() { 348 - defer wg.Done() 349 - e.chanMu.RLock() 350 - stdoutCh := e.stdoutChans[wid.String()] 351 - e.chanMu.RUnlock() 352 - 353 - scanner := bufio.NewScanner(rpipeOut) 354 - for scanner.Scan() { 355 - stdoutCh <- scanner.Text() 356 - } 357 - if err := scanner.Err(); err != nil { 358 - e.l.Error("failed to scan stdout", "error", err) 359 - } 360 - }() 361 - 362 - // read from stderr and send to stderr pipe 363 - // NOTE: the stderrCh channnel is closed further up in StartSteps 364 - // once all steps are done. 365 - wg.Add(1) 366 - go func() { 367 - defer wg.Done() 368 - e.chanMu.RLock() 369 - stderrCh := e.stderrChans[wid.String()] 370 - e.chanMu.RUnlock() 371 - 372 - scanner := bufio.NewScanner(rpipeErr) 373 - for scanner.Scan() { 374 - stderrCh <- scanner.Text() 375 - } 376 - if err := scanner.Err(); err != nil { 377 - e.l.Error("failed to scan stderr", "error", err) 378 - } 379 - }() 380 - 381 - wg.Wait() 382 339 383 340 return nil 384 341 } ··· 344 441 } 345 442 } 346 443 return nil 347 - } 348 - 349 - func (e *Engine) LogChannels(wid models.WorkflowId) (stdout <-chan string, stderr <-chan string, ok bool) { 350 - e.chanMu.RLock() 351 - defer e.chanMu.RUnlock() 352 - 353 - stdoutCh, ok1 := e.stdoutChans[wid.String()] 354 - stderrCh, ok2 := e.stderrChans[wid.String()] 355 - 356 - if !ok1 || !ok2 { 357 - return nil, nil, false 358 - } 359 - return stdoutCh, stderrCh, true 360 444 } 361 445 362 446 func (e *Engine) registerCleanup(wid models.WorkflowId, fn cleanupFunc) { ··· 397 507 CapDrop: []string{"ALL"}, 398 508 CapAdd: []string{"CAP_DAC_OVERRIDE"}, 399 509 SecurityOpt: []string{"no-new-privileges"}, 510 + ExtraHosts: []string{"host.docker.internal:host-gateway"}, 400 511 } 401 512 402 513 return hostConfig
+54 -38
spindle/engine/logger.go
··· 1 1 package engine 2 2 3 3 import ( 4 + "encoding/json" 4 5 "fmt" 5 6 "io" 6 7 "os" 7 8 "path/filepath" 9 + "strings" 10 + 11 + "tangled.sh/tangled.sh/core/spindle/models" 8 12 ) 9 13 10 - type StepLogger struct { 11 - stderr *os.File 12 - stdout *os.File 14 + type WorkflowLogger struct { 15 + file *os.File 16 + encoder *json.Encoder 13 17 } 14 18 15 - func NewStepLogger(baseDir, workflowID string, stepIdx int) (*StepLogger, error) { 16 - dir := filepath.Join(baseDir, workflowID) 19 + func NewWorkflowLogger(baseDir string, wid models.WorkflowId) (*WorkflowLogger, error) { 20 + dir := filepath.Join(baseDir, wid.String()) 17 21 if err := os.MkdirAll(dir, 0755); err != nil { 18 22 return nil, fmt.Errorf("creating log dir: %w", err) 19 23 } 20 24 21 - stdoutPath := logFilePath(baseDir, workflowID, "stdout", stepIdx) 22 - stderrPath := logFilePath(baseDir, workflowID, "stderr", stepIdx) 25 + path := LogFilePath(baseDir, wid) 23 26 24 - stdoutFile, err := os.Create(stdoutPath) 27 + file, err := os.Create(path) 25 28 if err != nil { 26 - return nil, fmt.Errorf("creating stdout log file: %w", err) 29 + return nil, fmt.Errorf("creating log file: %w", err) 27 30 } 28 31 29 - stderrFile, err := os.Create(stderrPath) 30 - if err != nil { 31 - stdoutFile.Close() 32 - return nil, fmt.Errorf("creating stderr log file: %w", err) 33 - } 34 - 35 - return &StepLogger{ 36 - stdout: stdoutFile, 37 - stderr: stderrFile, 32 + return &WorkflowLogger{ 33 + file: file, 34 + encoder: json.NewEncoder(file), 38 35 }, nil 39 36 } 40 37 41 - func (l *StepLogger) Stdout() io.Writer { 42 - return l.stdout 38 + func (l *WorkflowLogger) Write(p []byte) (n int, err error) { 39 + return l.file.Write(p) 43 40 } 44 41 45 - func (l *StepLogger) Stderr() io.Writer { 46 - return l.stderr 42 + func (l *WorkflowLogger) Close() error { 43 + return l.file.Close() 47 44 } 48 45 49 - func (l *StepLogger) Close() error { 50 - err1 := l.stdout.Close() 51 - err2 := l.stderr.Close() 52 - if err1 != nil { 53 - return err1 54 - } 55 - return err2 56 - } 46 + func OpenLogFile(baseDir string, workflowID models.WorkflowId) (*os.File, error) { 47 + logPath := LogFilePath(baseDir, workflowID) 57 48 58 - func ReadStepLog(baseDir, workflowID, stream string, stepIdx int) (string, error) { 59 - logPath := logFilePath(baseDir, workflowID, stream, stepIdx) 60 - 61 - data, err := os.ReadFile(logPath) 49 + file, err := os.Open(logPath) 62 50 if err != nil { 63 - return "", fmt.Errorf("error reading log file: %w", err) 51 + return nil, fmt.Errorf("error opening log file: %w", err) 64 52 } 65 53 66 - return string(data), nil 54 + return file, nil 67 55 } 68 56 69 - func logFilePath(baseDir, workflowID, stream string, stepIdx int) string { 70 - logFilePath := filepath.Join(baseDir, workflowID, fmt.Sprintf("%d-%s.log", stepIdx, stream)) 57 + func LogFilePath(baseDir string, workflowID models.WorkflowId) string { 58 + logFilePath := filepath.Join(baseDir, fmt.Sprintf("%s.log", workflowID.String())) 71 59 return logFilePath 60 + } 61 + 62 + func (l *WorkflowLogger) Stdout() io.Writer { 63 + return &jsonWriter{logger: l, stream: "stdout"} 64 + } 65 + 66 + func (l *WorkflowLogger) Stderr() io.Writer { 67 + return &jsonWriter{logger: l, stream: "stderr"} 68 + } 69 + 70 + type jsonWriter struct { 71 + logger *WorkflowLogger 72 + stream string 73 + } 74 + 75 + func (w *jsonWriter) Write(p []byte) (int, error) { 76 + line := strings.TrimRight(string(p), "\r\n") 77 + 78 + entry := models.LogLine{ 79 + Stream: w.stream, 80 + Data: line, 81 + } 82 + 83 + if err := w.logger.encoder.Encode(entry); err != nil { 84 + return 0, err 85 + } 86 + 87 + return len(p), nil 72 88 }
+5
spindle/models/models.go
··· 70 70 func (s StatusKind) IsFinish() bool { 71 71 return slices.Contains(FinishStates[:], s) 72 72 } 73 + 74 + type LogLine struct { 75 + Stream string `json:"s"` 76 + Data string `json:"d"` 77 + }
+5
spindle/models/pipeline.go
··· 53 53 swf.Image = workflowImage(twf.Dependencies, cfg.Pipelines.Nixery) 54 54 55 55 swf.addNixProfileToPath() 56 + swf.enableNixFlakes() 56 57 setup := &setupSteps{} 57 58 58 59 setup.addStep(nixConfStep()) ··· 101 100 102 101 func (wf *Workflow) addNixProfileToPath() { 103 102 wf.Environment["PATH"] = "$PATH:/.nix-profile/bin" 103 + } 104 + 105 + func (wf *Workflow) enableNixFlakes() { 106 + wf.Environment["NIX_CONFIG"] = "experimental-features = nix-command flakes" 104 107 }
-1
spindle/server.go
··· 148 148 w.Write([]byte(s.cfg.Server.Owner)) 149 149 }) 150 150 mux.HandleFunc("/logs/{knot}/{rkey}/{name}", s.Logs) 151 - mux.HandleFunc("/logs/{knot}/{rkey}/{name}/{idx}", s.StepLogs) 152 151 return mux 153 152 } 154 153
+29 -116
spindle/stream.go
··· 1 1 package spindle 2 2 3 3 import ( 4 - "bufio" 5 4 "context" 6 5 "encoding/json" 7 6 "fmt" 8 7 "net/http" 9 8 "strconv" 10 - "strings" 11 9 "time" 12 10 13 11 "tangled.sh/tangled.sh/core/spindle/engine" ··· 13 15 14 16 "github.com/go-chi/chi/v5" 15 17 "github.com/gorilla/websocket" 18 + "github.com/hpcloud/tail" 16 19 ) 17 20 18 21 var upgrader = websocket.Upgrader{ ··· 96 97 return 97 98 } 98 99 99 - s.handleLogStream(w, r, func(ctx context.Context, conn *websocket.Conn) error { 100 - return s.streamLogs(ctx, conn, wid) 101 - }) 102 - } 103 - 104 - func (s *Spindle) StepLogs(w http.ResponseWriter, r *http.Request) { 105 - wid, err := getWorkflowID(r) 106 - if err != nil { 107 - http.Error(w, err.Error(), http.StatusBadRequest) 108 - return 109 - } 110 - 111 - idxStr := chi.URLParam(r, "idx") 112 - if idxStr == "" { 113 - http.Error(w, "step index required", http.StatusBadRequest) 114 - return 115 - } 116 - idx, err := strconv.Atoi(idxStr) 117 - if err != nil { 118 - http.Error(w, "bad step index", http.StatusBadRequest) 119 - return 120 - } 121 - 122 - s.handleLogStream(w, r, func(ctx context.Context, conn *websocket.Conn) error { 123 - return s.streamLogFromDisk(ctx, conn, wid, idx) 124 - }) 125 - } 126 - 127 - func (s *Spindle) handleLogStream(w http.ResponseWriter, r *http.Request, streamFn func(ctx context.Context, conn *websocket.Conn) error) { 128 100 l := s.l.With("handler", "Logs") 101 + l = s.l.With("wid", wid) 129 102 130 103 conn, err := upgrader.Upgrade(w, r, nil) 131 104 if err != nil { ··· 121 150 } 122 151 }() 123 152 124 - if err := streamFn(ctx, conn); err != nil { 153 + if err := s.streamLogsFromDisk(ctx, conn, wid); err != nil { 125 154 l.Error("log stream failed", "err", err) 126 155 } 127 156 l.Debug("logs connection closed") 128 157 } 129 158 130 - func (s *Spindle) streamLogs(ctx context.Context, conn *websocket.Conn, wid models.WorkflowId) error { 131 - l := s.l.With("workflow_id", wid.String()) 159 + func (s *Spindle) streamLogsFromDisk(ctx context.Context, conn *websocket.Conn, wid models.WorkflowId) error { 160 + filePath := engine.LogFilePath(s.cfg.Pipelines.LogDir, wid) 132 161 133 - stdoutCh, stderrCh, ok := s.eng.LogChannels(wid) 134 - if !ok { 135 - return fmt.Errorf("workflow_id %q not found", wid.String()) 162 + config := tail.Config{ 163 + Follow: true, 164 + ReOpen: true, 165 + MustExist: false, 166 + Location: &tail.SeekInfo{Offset: 0, Whence: 0}, 167 + Logger: tail.DiscardingLogger, 136 168 } 137 169 138 - done := make(chan struct{}) 139 - 140 - go func() { 141 - for { 142 - select { 143 - case line, ok := <-stdoutCh: 144 - if !ok { 145 - done <- struct{}{} 146 - return 147 - } 148 - msg := map[string]string{"type": "stdout", "data": line} 149 - if err := conn.WriteJSON(msg); err != nil { 150 - l.Error("write stdout failed", "err", err) 151 - done <- struct{}{} 152 - return 153 - } 154 - case <-ctx.Done(): 155 - done <- struct{}{} 156 - return 157 - } 158 - } 159 - }() 160 - 161 - go func() { 162 - for { 163 - select { 164 - case line, ok := <-stderrCh: 165 - if !ok { 166 - done <- struct{}{} 167 - return 168 - } 169 - msg := map[string]string{"type": "stderr", "data": line} 170 - if err := conn.WriteJSON(msg); err != nil { 171 - l.Error("write stderr failed", "err", err) 172 - done <- struct{}{} 173 - return 174 - } 175 - case <-ctx.Done(): 176 - done <- struct{}{} 177 - return 178 - } 179 - } 180 - }() 181 - 182 - select { 183 - case <-done: 184 - case <-ctx.Done(): 170 + t, err := tail.TailFile(filePath, config) 171 + if err != nil { 172 + return fmt.Errorf("failed to tail log file: %w", err) 185 173 } 174 + defer t.Stop() 186 175 187 - return nil 188 - } 189 - 190 - func (s *Spindle) streamLogFromDisk(ctx context.Context, conn *websocket.Conn, wid models.WorkflowId, stepIdx int) error { 191 - streams := []string{"stdout", "stderr"} 192 - 193 - for _, stream := range streams { 194 - data, err := engine.ReadStepLog(s.cfg.Pipelines.LogDir, wid.String(), stream, stepIdx) 195 - if err != nil { 196 - // log but continue to next stream 197 - s.l.Error("failed to read step log", "stream", stream, "step", stepIdx, "wid", wid.String(), "err", err) 198 - continue 199 - } 200 - 201 - scanner := bufio.NewScanner(strings.NewReader(data)) 202 - for scanner.Scan() { 203 - select { 204 - case <-ctx.Done(): 205 - return ctx.Err() 206 - default: 207 - msg := map[string]string{ 208 - "type": stream, 209 - "data": scanner.Text(), 210 - } 211 - if err := conn.WriteJSON(msg); err != nil { 212 - return err 213 - } 176 + for { 177 + select { 178 + case <-ctx.Done(): 179 + return ctx.Err() 180 + case line := <-t.Lines: 181 + if line == nil { 182 + return fmt.Errorf("tail channel closed unexpectedly") 214 183 } 215 - } 216 184 217 - if err := scanner.Err(); err != nil { 218 - return fmt.Errorf("error scanning %s log: %w", stream, err) 185 + if line.Err != nil { 186 + return fmt.Errorf("error tailing log file: %w", line.Err) 187 + } 188 + 189 + if err := conn.WriteMessage(websocket.TextMessage, []byte(line.Text)); err != nil { 190 + return fmt.Errorf("failed to write to websocket: %w", err) 191 + } 219 192 } 220 193 } 221 - 222 - return nil 223 194 } 224 195 225 196 func (s *Spindle) streamPipelines(conn *websocket.Conn, cursor *int64) error {