+6
-6
nix/vm.nix
+6
-6
nix/vm.nix
···
43
43
guest.port = 6000;
44
44
}
45
45
# spindle
46
-
{
47
-
from = "host";
48
-
host.port = 6555;
49
-
guest.port = 6555;
50
-
}
46
+
# {
47
+
# from = "host";
48
+
# host.port = 6555;
49
+
# guest.port = 6555;
50
+
# }
51
51
];
52
52
sharedDirectories = {
53
53
# We can't use the 9p mounts directly for most of these
···
83
83
};
84
84
};
85
85
services.tangled-spindle = {
86
-
enable = true;
86
+
enable = false;
87
87
server = {
88
88
owner = envVar "TANGLED_VM_SPINDLE_OWNER";
89
89
hostname = "localhost:6555";
+90
-26
spindle/engines/nixery/engine.go
+90
-26
spindle/engines/nixery/engine.go
···
173
173
func (e *Engine) SetupWorkflow(ctx context.Context, wid models.WorkflowId, wf *models.Workflow) error {
174
174
e.l.Info("setting up workflow", "workflow", wid)
175
175
176
-
_, err := e.docker.NetworkCreate(ctx, networkName(wid), network.CreateOptions{
177
-
Driver: "bridge",
178
-
})
179
-
if err != nil {
180
-
return err
181
-
}
182
-
e.registerCleanup(wid, func(ctx context.Context) error {
183
-
return e.docker.NetworkRemove(ctx, networkName(wid))
184
-
})
185
-
186
176
addl := wf.Data.(addlFields)
187
177
188
178
reader, err := e.docker.ImagePull(ctx, addl.image, image.PullOptions{})
···
193
183
}
194
184
defer reader.Close()
195
185
io.Copy(os.Stdout, reader)
186
+
187
+
_, err = e.docker.NetworkCreate(ctx, networkName(wid), network.CreateOptions{
188
+
Driver: "bridge",
189
+
})
190
+
if err != nil {
191
+
return err
192
+
}
193
+
e.registerCleanup(wid, func(ctx context.Context) error {
194
+
return e.docker.NetworkRemove(ctx, networkName(wid))
195
+
})
196
196
197
197
resp, err := e.docker.ContainerCreate(ctx, &container.Config{
198
198
Image: addl.image,
···
294
294
for _, s := range secrets {
295
295
workflowEnvs.AddEnv(s.Key, s.Value)
296
296
}
297
-
298
297
step := w.Steps[idx].(Step)
299
-
300
298
select {
301
299
case <-ctx.Done():
302
300
return ctx.Err()
303
301
default:
304
302
}
305
-
306
303
envs := append(EnvVars(nil), workflowEnvs...)
307
304
for k, v := range step.environment {
308
305
envs.AddEnv(k, v)
309
306
}
310
307
envs.AddEnv("HOME", homeDir)
308
+
309
+
e.l.Info("executing step",
310
+
"workflow_id", wid.String(),
311
+
"step_index", idx,
312
+
"step_name", step.Name,
313
+
"command", step.command,
314
+
)
311
315
312
316
mkExecResp, err := e.docker.ContainerExecCreate(ctx, addl.container, container.ExecOptions{
313
317
Cmd: []string{"bash", "-c", step.command},
···
327
331
328
332
select {
329
333
case <-tailDone:
330
-
331
334
case <-ctx.Done():
332
335
// cleanup will be handled by DestroyWorkflow, since
333
336
// Docker doesn't provide an API to kill an exec run
334
337
// (sure, we could grab the PID and kill it ourselves,
335
338
// but that's wasted effort)
336
339
e.l.Warn("step timed out", "step", step.Name)
337
-
338
340
<-tailDone
339
-
340
341
return engine.ErrTimedOut
341
342
}
342
343
···
346
347
default:
347
348
}
348
349
349
-
execInspectResp, err := e.docker.ContainerExecInspect(ctx, mkExecResp.ID)
350
+
if err = e.handleStepFailure(ctx, wid, w, idx, mkExecResp.ID); err != nil {
351
+
return err
352
+
}
353
+
354
+
e.l.Info("step completed successfully",
355
+
"workflow_id", wid.String(),
356
+
"step_index", idx,
357
+
"step_name", step.Name,
358
+
)
359
+
360
+
return nil
361
+
}
362
+
363
+
// logStepFailure logs detailed information about a failed workflow step
364
+
func (e *Engine) handleStepFailure(
365
+
ctx context.Context,
366
+
wid models.WorkflowId,
367
+
w *models.Workflow,
368
+
idx int,
369
+
execID string,
370
+
) error {
371
+
addl := w.Data.(addlFields)
372
+
step := w.Steps[idx].(Step)
373
+
374
+
inspectResp, err := e.docker.ContainerInspect(ctx, addl.container)
350
375
if err != nil {
351
376
return err
352
377
}
353
378
354
-
if execInspectResp.ExitCode != 0 {
355
-
inspectResp, err := e.docker.ContainerInspect(ctx, addl.container)
356
-
if err != nil {
357
-
return err
379
+
execInspectResp, err := e.docker.ContainerExecInspect(ctx, execID)
380
+
if err != nil {
381
+
return err
382
+
}
383
+
384
+
// no error
385
+
if execInspectResp.ExitCode == 0 {
386
+
return nil
387
+
}
388
+
389
+
logFields := []any{
390
+
"workflow_id", wid.String(),
391
+
"step_index", idx,
392
+
"step_name", step.Name,
393
+
"command", step.command,
394
+
"container_exit_code", inspectResp.State.ExitCode,
395
+
"container_oom_killed", inspectResp.State.OOMKilled,
396
+
"exec_exit_code", execInspectResp.ExitCode,
397
+
}
398
+
399
+
// Add container state information
400
+
if inspectResp.State != nil {
401
+
logFields = append(logFields,
402
+
"container_status", inspectResp.State.Status,
403
+
"container_running", inspectResp.State.Running,
404
+
"container_paused", inspectResp.State.Paused,
405
+
"container_restarting", inspectResp.State.Restarting,
406
+
"container_dead", inspectResp.State.Dead,
407
+
)
408
+
409
+
if inspectResp.State.Error != "" {
410
+
logFields = append(logFields, "container_error", inspectResp.State.Error)
358
411
}
359
412
360
-
e.l.Error("workflow failed!", "workflow_id", wid.String(), "exit_code", execInspectResp.ExitCode, "oom_killed", inspectResp.State.OOMKilled)
413
+
if inspectResp.State.StartedAt != "" {
414
+
logFields = append(logFields, "container_started_at", inspectResp.State.StartedAt)
415
+
}
361
416
362
-
if inspectResp.State.OOMKilled {
363
-
return ErrOOMKilled
417
+
if inspectResp.State.FinishedAt != "" {
418
+
logFields = append(logFields, "container_finished_at", inspectResp.State.FinishedAt)
364
419
}
365
-
return engine.ErrWorkflowFailed
366
420
}
367
421
368
-
return nil
422
+
// Add resource usage if available
423
+
if inspectResp.HostConfig != nil && inspectResp.HostConfig.Memory > 0 {
424
+
logFields = append(logFields, "memory_limit", inspectResp.HostConfig.Memory)
425
+
}
426
+
427
+
e.l.Error("workflow step failed!", logFields...)
428
+
429
+
if inspectResp.State.OOMKilled {
430
+
return ErrOOMKilled
431
+
}
432
+
return engine.ErrWorkflowFailed
369
433
}
370
434
371
435
func (e *Engine) tailStep(ctx context.Context, wfLogger *models.WorkflowLogger, execID string, wid models.WorkflowId, stepIdx int, step models.Step) error {