+2
-1
spindle/config/config.go
+2
-1
spindle/config/config.go
+2
-2
spindle/engine/ansi_stripper.go
+2
-2
spindle/engine/ansi_stripper.go
···
3
3
import (
4
4
"io"
5
5
6
-
"github.com/go-enry/go-enry/v2/regex"
6
+
"regexp"
7
7
)
8
8
9
9
// regex to match ANSI escape codes (e.g., color codes, cursor moves)
10
10
const ansi = "[\u001B\u009B][[\\]()#;?]*(?:(?:(?:[a-zA-Z\\d]*(?:;[a-zA-Z\\d]*)*)?\u0007)|(?:(?:\\d{1,4}(?:;\\d{0,4})*)?[\\dA-PRZcf-ntqry=><~]))"
11
11
12
-
var re = regex.MustCompile(ansi)
12
+
var re = regexp.MustCompile(ansi)
13
13
14
14
type ansiStrippingWriter struct {
15
15
underlying io.Writer
+52
-19
spindle/engine/engine.go
+52
-19
spindle/engine/engine.go
···
3
3
import (
4
4
"bufio"
5
5
"context"
6
+
"errors"
6
7
"fmt"
7
8
"io"
8
9
"log/slog"
9
10
"os"
10
11
"strings"
11
12
"sync"
13
+
"time"
12
14
13
15
"github.com/docker/docker/api/types/container"
14
16
"github.com/docker/docker/api/types/image"
···
176
178
// StartSteps starts all steps sequentially with the same base image.
177
179
// ONLY marks pipeline as failed if container's exit code is non-zero.
178
180
// All other errors are bubbled up.
181
+
// Fixed version of the step execution logic
179
182
func (e *Engine) StartSteps(ctx context.Context, steps []models.Step, wid models.WorkflowId, image string) error {
180
-
// set up logging channels
183
+
stepTimeoutStr := e.cfg.Pipelines.StepTimeout
184
+
stepTimeout, err := time.ParseDuration(stepTimeoutStr)
185
+
if err != nil {
186
+
e.l.Error("failed to parse step timeout", "error", err, "timeout", stepTimeoutStr)
187
+
stepTimeout = 5 * time.Minute
188
+
}
189
+
e.l.Info("using step timeout", "timeout", stepTimeout)
190
+
181
191
e.chanMu.Lock()
182
192
if _, exists := e.stdoutChans[wid.String()]; !exists {
183
193
e.stdoutChans[wid.String()] = make(chan string, 100)
···
216
226
return fmt.Errorf("connecting network: %w", err)
217
227
}
218
228
219
-
err = e.docker.ContainerStart(ctx, resp.ID, container.StartOptions{})
229
+
stepCtx, stepCancel := context.WithTimeout(ctx, stepTimeout)
230
+
231
+
err = e.docker.ContainerStart(stepCtx, resp.ID, container.StartOptions{})
220
232
if err != nil {
233
+
stepCancel()
221
234
return err
222
235
}
223
236
e.l.Info("started container", "name", resp.ID, "step", step.Name)
224
237
225
-
wg := sync.WaitGroup{}
238
+
// start tailing logs in background
239
+
tailDone := make(chan error, 1)
240
+
go func() {
241
+
tailDone <- e.TailStep(stepCtx, resp.ID, wid)
242
+
}()
226
243
227
-
wg.Add(1)
244
+
// wait for container completion or timeout
245
+
waitDone := make(chan struct{})
246
+
var state *container.State
247
+
var waitErr error
248
+
228
249
go func() {
229
-
defer wg.Done()
230
-
err := e.TailStep(ctx, resp.ID, wid)
231
-
if err != nil {
232
-
e.l.Error("failed to tail container", "container", resp.ID)
233
-
return
234
-
}
250
+
defer close(waitDone)
251
+
state, waitErr = e.WaitStep(stepCtx, resp.ID)
235
252
}()
236
253
237
-
// wait until all logs are piped
238
-
wg.Wait()
254
+
select {
255
+
case <-waitDone:
256
+
// container finished normally
257
+
stepCancel()
258
+
259
+
// wait for tailing to complete
260
+
<-tailDone
261
+
262
+
case <-stepCtx.Done():
263
+
e.l.Warn("step timed out; killing container", "container", resp.ID, "timeout", stepTimeout)
264
+
265
+
_ = e.DestroyStep(ctx, resp.ID)
266
+
267
+
// wait for both goroutines to finish
268
+
<-waitDone
269
+
<-tailDone
270
+
271
+
stepCancel()
272
+
return fmt.Errorf("step timed out after %v", stepTimeout)
273
+
}
239
274
240
-
state, err := e.WaitStep(ctx, resp.ID)
241
-
if err != nil {
242
-
return err
275
+
if waitErr != nil {
276
+
return waitErr
243
277
}
244
278
245
279
err = e.DestroyStep(ctx, resp.ID)
···
253
287
if err != nil {
254
288
return err
255
289
}
256
-
return fmt.Errorf("error: %s, exit code: %d, oom: %s", state.Error, state.ExitCode, state.OOMKilled)
290
+
return fmt.Errorf("error: %s, exit code: %d, oom: %t", state.Error, state.ExitCode, state.OOMKilled)
257
291
}
258
292
}
259
293
260
294
return nil
261
-
262
295
}
263
296
264
297
func (e *Engine) WaitStep(ctx context.Context, containerID string) (*container.State, error) {
···
318
351
defer wpipeOut.Close()
319
352
defer wpipeErr.Close()
320
353
_, err := stdcopy.StdCopy(wpipeOut, wpipeErr, tee)
321
-
if err != nil && err != io.EOF {
354
+
if err != nil && err != io.EOF && !errors.Is(context.DeadlineExceeded, err) {
322
355
e.l.Error("failed to copy logs", "error", err)
323
356
}
324
357
}()
···
393
426
394
427
for _, fn := range fns {
395
428
if err := fn(ctx); err != nil {
396
-
e.l.Error("failed to cleanup workflow resource", "workflowId", wid)
429
+
e.l.Error("failed to cleanup workflow resource", "workflowId", wid, "error", err)
397
430
}
398
431
}
399
432
return nil