+125
-45
spindle/engine/engine.go
+125
-45
spindle/engine/engine.go
···
8
8
"log/slog"
9
9
"os"
10
10
"path"
11
+
"strings"
11
12
"sync"
13
+
"syscall"
12
14
13
15
"github.com/docker/docker/api/types/container"
14
16
"github.com/docker/docker/api/types/image"
···
28
30
workspaceDir = "/tangled/workspace"
29
31
)
30
32
33
+
type cleanupFunc func(context.Context) error
34
+
31
35
type Engine struct {
32
36
docker client.APIClient
33
37
l *slog.Logger
···
37
41
chanMu sync.RWMutex
38
42
stdoutChans map[string]chan string
39
43
stderrChans map[string]chan string
44
+
45
+
cleanupMu sync.Mutex
46
+
cleanup map[string][]cleanupFunc
40
47
}
41
48
42
49
func New(ctx context.Context, db *db.DB, n *notifier.Notifier) (*Engine, error) {
···
57
64
e.stdoutChans = make(map[string]chan string, 100)
58
65
e.stderrChans = make(map[string]chan string, 100)
59
66
67
+
e.cleanup = make(map[string][]cleanupFunc)
68
+
60
69
return e, nil
61
70
}
62
71
63
-
// SetupPipeline sets up a new network for the pipeline, and possibly volumes etc.
64
-
// in the future. In here also goes other setup steps.
65
-
func (e *Engine) SetupPipeline(ctx context.Context, pipeline *tangled.Pipeline, atUri, id string) error {
66
-
e.l.Info("setting up pipeline", "pipeline", id)
67
-
68
-
_, err := e.docker.VolumeCreate(ctx, volume.CreateOptions{
69
-
Name: workspaceVolume(id),
70
-
Driver: "local",
71
-
})
72
-
if err != nil {
73
-
return err
74
-
}
75
-
76
-
_, err = e.docker.VolumeCreate(ctx, volume.CreateOptions{
77
-
Name: nixVolume(id),
78
-
Driver: "local",
79
-
})
80
-
if err != nil {
81
-
return err
82
-
}
83
-
84
-
_, err = e.docker.NetworkCreate(ctx, pipelineName(id), network.CreateOptions{
85
-
Driver: "bridge",
86
-
})
87
-
if err != nil {
88
-
return err
89
-
}
90
-
91
-
err = e.db.CreatePipeline(id, atUri, e.n)
92
-
return err
93
-
}
94
-
95
72
func (e *Engine) StartWorkflows(ctx context.Context, pipeline *tangled.Pipeline, id string) error {
96
73
e.l.Info("starting all workflows in parallel", "pipeline", id)
97
74
···
103
80
g := errgroup.Group{}
104
81
for _, w := range pipeline.Workflows {
105
82
g.Go(func() error {
83
+
err := e.SetupWorkflow(ctx, id, w.Name)
84
+
if err != nil {
85
+
return err
86
+
}
87
+
88
+
defer e.DestroyWorkflow(ctx, id, w.Name)
89
+
106
90
// TODO: actual checks for image/registry etc.
107
91
var deps string
108
92
for _, d := range w.Dependencies {
···
127
111
defer reader.Close()
128
112
io.Copy(os.Stdout, reader)
129
113
130
-
err = e.StartSteps(ctx, w.Steps, id, cimg)
114
+
err = e.StartSteps(ctx, w.Steps, w.Name, id, cimg)
131
115
if err != nil {
132
116
e.l.Error("pipeline failed!", "id", id, "error", err.Error())
133
117
return e.db.MarkPipelineFailed(id, -1, err.Error(), e.n)
···
147
131
return e.db.MarkPipelineSuccess(id, e.n)
148
132
}
149
133
134
+
// SetupWorkflow sets up a new network for the workflow and volumes for
135
+
// the workspace and Nix store. These are persisted across steps and are
136
+
// destroyed at the end of the workflow.
137
+
func (e *Engine) SetupWorkflow(ctx context.Context, id, workflowName string) error {
138
+
e.l.Info("setting up workflow", "pipeline", id, "workflow", workflowName)
139
+
140
+
_, err := e.docker.VolumeCreate(ctx, volume.CreateOptions{
141
+
Name: workspaceVolume(id, workflowName),
142
+
Driver: "local",
143
+
})
144
+
if err != nil {
145
+
return err
146
+
}
147
+
e.registerCleanup(id, workflowName, func(ctx context.Context) error {
148
+
return e.docker.VolumeRemove(ctx, workspaceVolume(id, workflowName), true)
149
+
})
150
+
151
+
_, err = e.docker.VolumeCreate(ctx, volume.CreateOptions{
152
+
Name: nixVolume(id, workflowName),
153
+
Driver: "local",
154
+
})
155
+
if err != nil {
156
+
return err
157
+
}
158
+
e.registerCleanup(id, workflowName, func(ctx context.Context) error {
159
+
return e.docker.VolumeRemove(ctx, nixVolume(id, workflowName), true)
160
+
})
161
+
162
+
_, err = e.docker.NetworkCreate(ctx, networkName(id, workflowName), network.CreateOptions{
163
+
Driver: "bridge",
164
+
})
165
+
if err != nil {
166
+
return err
167
+
}
168
+
e.registerCleanup(id, workflowName, func(ctx context.Context) error {
169
+
return e.docker.NetworkRemove(ctx, networkName(id, workflowName))
170
+
})
171
+
172
+
return nil
173
+
}
174
+
150
175
// StartSteps starts all steps sequentially with the same base image.
151
176
// ONLY marks pipeline as failed if container's exit code is non-zero.
152
177
// All other errors are bubbled up.
153
-
func (e *Engine) StartSteps(ctx context.Context, steps []*tangled.Pipeline_Step, id, image string) error {
178
+
func (e *Engine) StartSteps(ctx context.Context, steps []*tangled.Pipeline_Step, workflowName, id, image string) error {
154
179
// set up logging channels
155
180
e.chanMu.Lock()
156
181
if _, exists := e.stdoutChans[id]; !exists {
···
168
193
}()
169
194
170
195
for _, step := range steps {
171
-
hostConfig := hostConfig(id)
196
+
hostConfig := hostConfig(id, workflowName)
172
197
resp, err := e.docker.ContainerCreate(ctx, &container.Config{
173
198
Image: image,
174
199
Cmd: []string{"bash", "-c", step.Command},
···
181
206
return fmt.Errorf("creating container: %w", err)
182
207
}
183
208
184
-
err = e.docker.NetworkConnect(ctx, pipelineName(id), resp.ID, nil)
209
+
err = e.docker.NetworkConnect(ctx, networkName(id, workflowName), resp.ID, nil)
185
210
if err != nil {
186
211
return fmt.Errorf("connecting network: %w", err)
187
212
}
···
208
233
wg.Wait()
209
234
210
235
state, err := e.WaitStep(ctx, resp.ID)
236
+
if err != nil {
237
+
return err
238
+
}
239
+
240
+
err = e.DestroyStep(ctx, resp.ID, id)
211
241
if err != nil {
212
242
return err
213
243
}
···
310
340
return nil
311
341
}
312
342
343
+
func (e *Engine) DestroyStep(ctx context.Context, containerID, pipelineID string) error {
344
+
err := e.docker.ContainerKill(ctx, containerID, syscall.SIGKILL.String())
345
+
if err != nil && !isErrContainerNotFoundOrNotRunning(err) {
346
+
return err
347
+
}
348
+
349
+
if err := e.docker.ContainerRemove(ctx, containerID, container.RemoveOptions{
350
+
RemoveVolumes: true,
351
+
RemoveLinks: false,
352
+
Force: false,
353
+
}); err != nil && !isErrContainerNotFoundOrNotRunning(err) {
354
+
return err
355
+
}
356
+
357
+
return nil
358
+
}
359
+
360
+
func (e *Engine) DestroyWorkflow(ctx context.Context, pipelineID, workflowName string) error {
361
+
e.cleanupMu.Lock()
362
+
key := fmt.Sprintf("%s-%s", pipelineID, workflowName)
363
+
364
+
fns := e.cleanup[key]
365
+
delete(e.cleanup, key)
366
+
e.cleanupMu.Unlock()
367
+
368
+
for _, fn := range fns {
369
+
if err := fn(ctx); err != nil {
370
+
e.l.Error("failed to cleanup workflow resource", "pipeline", pipelineID, "workflow", workflowName, "err", err)
371
+
}
372
+
}
373
+
return nil
374
+
}
375
+
313
376
func (e *Engine) LogChannels(pipelineID string) (stdout <-chan string, stderr <-chan string, ok bool) {
314
377
e.chanMu.RLock()
315
378
defer e.chanMu.RUnlock()
···
323
386
return stdoutCh, stderrCh, true
324
387
}
325
388
326
-
func workspaceVolume(id string) string {
327
-
return "workspace-" + id
389
+
func (e *Engine) registerCleanup(pipelineID, workflowName string, fn cleanupFunc) {
390
+
e.cleanupMu.Lock()
391
+
defer e.cleanupMu.Unlock()
392
+
393
+
key := fmt.Sprintf("%s-%s", pipelineID, workflowName)
394
+
e.cleanup[key] = append(e.cleanup[key], fn)
328
395
}
329
396
330
-
func nixVolume(id string) string {
331
-
return "nix-" + id
397
+
func workspaceVolume(id, name string) string {
398
+
return fmt.Sprintf("workspace-%s-%s", id, name)
332
399
}
333
400
334
-
func pipelineName(id string) string {
335
-
return "pipeline-" + id
401
+
func nixVolume(id, name string) string {
402
+
return fmt.Sprintf("nix-%s-%s", id, name)
403
+
}
404
+
405
+
func networkName(id, name string) string {
406
+
return fmt.Sprintf("workflow-network-%s-%s", id, name)
336
407
}
337
408
338
-
func hostConfig(id string) *container.HostConfig {
409
+
func hostConfig(id, name string) *container.HostConfig {
339
410
hostConfig := &container.HostConfig{
340
411
Mounts: []mount.Mount{
341
412
{
342
413
Type: mount.TypeVolume,
343
-
Source: workspaceVolume(id),
414
+
Source: workspaceVolume(id, name),
344
415
Target: workspaceDir,
345
416
},
346
417
{
347
418
Type: mount.TypeVolume,
348
-
Source: nixVolume(id),
419
+
Source: nixVolume(id, name),
349
420
Target: "/nix",
350
421
},
351
422
},
···
356
427
357
428
return hostConfig
358
429
}
430
+
431
+
// thanks woodpecker
432
+
func isErrContainerNotFoundOrNotRunning(err error) bool {
433
+
// Error response from daemon: Cannot kill container: ...: No such container: ...
434
+
// Error response from daemon: Cannot kill container: ...: Container ... is not running"
435
+
// Error response from podman daemon: can only kill running containers. ... is in state exited
436
+
// Error: No such container: ...
437
+
return err != nil && (strings.Contains(err.Error(), "No such container") || strings.Contains(err.Error(), "is not running") || strings.Contains(err.Error(), "can only kill running containers"))
438
+
}
+4
-2
spindle/server.go
+4
-2
spindle/server.go
···
122
122
pipelineAtUri := fmt.Sprintf("at://%s/did:web:%s/%s", tangled.PipelineNSID, pipeline.TriggerMetadata.Repo.Knot, msg.Rkey)
123
123
124
124
rkey := TID()
125
-
err = s.eng.SetupPipeline(ctx, &pipeline, pipelineAtUri, rkey)
125
+
126
+
err = s.db.CreatePipeline(rkey, pipelineAtUri, s.n)
126
127
if err != nil {
127
128
return err
128
129
}
130
+
129
131
return s.eng.StartWorkflows(ctx, &pipeline, rkey)
130
132
},
131
133
OnFail: func(error) {
132
-
s.l.Error("pipeline setup failed", "error", err)
134
+
s.l.Error("pipeline run failed", "error", err)
133
135
},
134
136
})
135
137
if ok {