forked from tangled.org/core
this repo has no description

spindle/engine: setup and destroy workflows

During setup, we register cleanup functions which get executed at the
end of the workflow goroutine (deferred exec of DestroyWorkflow).

Signed-off-by: Anirudh Oppiliappan <anirudh@tangled.sh>

anirudh.fi 96c52925 1a84fc09

verified
Changed files
+129 -47
spindle
+125 -45
spindle/engine/engine.go
··· 8 8 "log/slog" 9 9 "os" 10 10 "path" 11 + "strings" 11 12 "sync" 13 + "syscall" 12 14 13 15 "github.com/docker/docker/api/types/container" 14 16 "github.com/docker/docker/api/types/image" ··· 28 30 workspaceDir = "/tangled/workspace" 29 31 ) 30 32 33 + type cleanupFunc func(context.Context) error 34 + 31 35 type Engine struct { 32 36 docker client.APIClient 33 37 l *slog.Logger ··· 37 41 chanMu sync.RWMutex 38 42 stdoutChans map[string]chan string 39 43 stderrChans map[string]chan string 44 + 45 + cleanupMu sync.Mutex 46 + cleanup map[string][]cleanupFunc 40 47 } 41 48 42 49 func New(ctx context.Context, db *db.DB, n *notifier.Notifier) (*Engine, error) { ··· 57 64 e.stdoutChans = make(map[string]chan string, 100) 58 65 e.stderrChans = make(map[string]chan string, 100) 59 66 67 + e.cleanup = make(map[string][]cleanupFunc) 68 + 60 69 return e, nil 61 70 } 62 71 63 - // SetupPipeline sets up a new network for the pipeline, and possibly volumes etc. 64 - // in the future. In here also goes other setup steps. 65 - func (e *Engine) SetupPipeline(ctx context.Context, pipeline *tangled.Pipeline, atUri, id string) error { 66 - e.l.Info("setting up pipeline", "pipeline", id) 67 - 68 - _, err := e.docker.VolumeCreate(ctx, volume.CreateOptions{ 69 - Name: workspaceVolume(id), 70 - Driver: "local", 71 - }) 72 - if err != nil { 73 - return err 74 - } 75 - 76 - _, err = e.docker.VolumeCreate(ctx, volume.CreateOptions{ 77 - Name: nixVolume(id), 78 - Driver: "local", 79 - }) 80 - if err != nil { 81 - return err 82 - } 83 - 84 - _, err = e.docker.NetworkCreate(ctx, pipelineName(id), network.CreateOptions{ 85 - Driver: "bridge", 86 - }) 87 - if err != nil { 88 - return err 89 - } 90 - 91 - err = e.db.CreatePipeline(id, atUri, e.n) 92 - return err 93 - } 94 - 95 72 func (e *Engine) StartWorkflows(ctx context.Context, pipeline *tangled.Pipeline, id string) error { 96 73 e.l.Info("starting all workflows in parallel", "pipeline", id) 97 74 ··· 103 80 g := errgroup.Group{} 104 81 for _, w := range pipeline.Workflows { 105 82 g.Go(func() error { 83 + err := e.SetupWorkflow(ctx, id, w.Name) 84 + if err != nil { 85 + return err 86 + } 87 + 88 + defer e.DestroyWorkflow(ctx, id, w.Name) 89 + 106 90 // TODO: actual checks for image/registry etc. 107 91 var deps string 108 92 for _, d := range w.Dependencies { ··· 127 111 defer reader.Close() 128 112 io.Copy(os.Stdout, reader) 129 113 130 - err = e.StartSteps(ctx, w.Steps, id, cimg) 114 + err = e.StartSteps(ctx, w.Steps, w.Name, id, cimg) 131 115 if err != nil { 132 116 e.l.Error("pipeline failed!", "id", id, "error", err.Error()) 133 117 return e.db.MarkPipelineFailed(id, -1, err.Error(), e.n) ··· 147 131 return e.db.MarkPipelineSuccess(id, e.n) 148 132 } 149 133 134 + // SetupWorkflow sets up a new network for the workflow and volumes for 135 + // the workspace and Nix store. These are persisted across steps and are 136 + // destroyed at the end of the workflow. 137 + func (e *Engine) SetupWorkflow(ctx context.Context, id, workflowName string) error { 138 + e.l.Info("setting up workflow", "pipeline", id, "workflow", workflowName) 139 + 140 + _, err := e.docker.VolumeCreate(ctx, volume.CreateOptions{ 141 + Name: workspaceVolume(id, workflowName), 142 + Driver: "local", 143 + }) 144 + if err != nil { 145 + return err 146 + } 147 + e.registerCleanup(id, workflowName, func(ctx context.Context) error { 148 + return e.docker.VolumeRemove(ctx, workspaceVolume(id, workflowName), true) 149 + }) 150 + 151 + _, err = e.docker.VolumeCreate(ctx, volume.CreateOptions{ 152 + Name: nixVolume(id, workflowName), 153 + Driver: "local", 154 + }) 155 + if err != nil { 156 + return err 157 + } 158 + e.registerCleanup(id, workflowName, func(ctx context.Context) error { 159 + return e.docker.VolumeRemove(ctx, nixVolume(id, workflowName), true) 160 + }) 161 + 162 + _, err = e.docker.NetworkCreate(ctx, networkName(id, workflowName), network.CreateOptions{ 163 + Driver: "bridge", 164 + }) 165 + if err != nil { 166 + return err 167 + } 168 + e.registerCleanup(id, workflowName, func(ctx context.Context) error { 169 + return e.docker.NetworkRemove(ctx, networkName(id, workflowName)) 170 + }) 171 + 172 + return nil 173 + } 174 + 150 175 // StartSteps starts all steps sequentially with the same base image. 151 176 // ONLY marks pipeline as failed if container's exit code is non-zero. 152 177 // All other errors are bubbled up. 153 - func (e *Engine) StartSteps(ctx context.Context, steps []*tangled.Pipeline_Step, id, image string) error { 178 + func (e *Engine) StartSteps(ctx context.Context, steps []*tangled.Pipeline_Step, workflowName, id, image string) error { 154 179 // set up logging channels 155 180 e.chanMu.Lock() 156 181 if _, exists := e.stdoutChans[id]; !exists { ··· 168 193 }() 169 194 170 195 for _, step := range steps { 171 - hostConfig := hostConfig(id) 196 + hostConfig := hostConfig(id, workflowName) 172 197 resp, err := e.docker.ContainerCreate(ctx, &container.Config{ 173 198 Image: image, 174 199 Cmd: []string{"bash", "-c", step.Command}, ··· 181 206 return fmt.Errorf("creating container: %w", err) 182 207 } 183 208 184 - err = e.docker.NetworkConnect(ctx, pipelineName(id), resp.ID, nil) 209 + err = e.docker.NetworkConnect(ctx, networkName(id, workflowName), resp.ID, nil) 185 210 if err != nil { 186 211 return fmt.Errorf("connecting network: %w", err) 187 212 } ··· 208 233 wg.Wait() 209 234 210 235 state, err := e.WaitStep(ctx, resp.ID) 236 + if err != nil { 237 + return err 238 + } 239 + 240 + err = e.DestroyStep(ctx, resp.ID, id) 211 241 if err != nil { 212 242 return err 213 243 } ··· 310 340 return nil 311 341 } 312 342 343 + func (e *Engine) DestroyStep(ctx context.Context, containerID, pipelineID string) error { 344 + err := e.docker.ContainerKill(ctx, containerID, syscall.SIGKILL.String()) 345 + if err != nil && !isErrContainerNotFoundOrNotRunning(err) { 346 + return err 347 + } 348 + 349 + if err := e.docker.ContainerRemove(ctx, containerID, container.RemoveOptions{ 350 + RemoveVolumes: true, 351 + RemoveLinks: false, 352 + Force: false, 353 + }); err != nil && !isErrContainerNotFoundOrNotRunning(err) { 354 + return err 355 + } 356 + 357 + return nil 358 + } 359 + 360 + func (e *Engine) DestroyWorkflow(ctx context.Context, pipelineID, workflowName string) error { 361 + e.cleanupMu.Lock() 362 + key := fmt.Sprintf("%s-%s", pipelineID, workflowName) 363 + 364 + fns := e.cleanup[key] 365 + delete(e.cleanup, key) 366 + e.cleanupMu.Unlock() 367 + 368 + for _, fn := range fns { 369 + if err := fn(ctx); err != nil { 370 + e.l.Error("failed to cleanup workflow resource", "pipeline", pipelineID, "workflow", workflowName, "err", err) 371 + } 372 + } 373 + return nil 374 + } 375 + 313 376 func (e *Engine) LogChannels(pipelineID string) (stdout <-chan string, stderr <-chan string, ok bool) { 314 377 e.chanMu.RLock() 315 378 defer e.chanMu.RUnlock() ··· 323 386 return stdoutCh, stderrCh, true 324 387 } 325 388 326 - func workspaceVolume(id string) string { 327 - return "workspace-" + id 389 + func (e *Engine) registerCleanup(pipelineID, workflowName string, fn cleanupFunc) { 390 + e.cleanupMu.Lock() 391 + defer e.cleanupMu.Unlock() 392 + 393 + key := fmt.Sprintf("%s-%s", pipelineID, workflowName) 394 + e.cleanup[key] = append(e.cleanup[key], fn) 328 395 } 329 396 330 - func nixVolume(id string) string { 331 - return "nix-" + id 397 + func workspaceVolume(id, name string) string { 398 + return fmt.Sprintf("workspace-%s-%s", id, name) 332 399 } 333 400 334 - func pipelineName(id string) string { 335 - return "pipeline-" + id 401 + func nixVolume(id, name string) string { 402 + return fmt.Sprintf("nix-%s-%s", id, name) 403 + } 404 + 405 + func networkName(id, name string) string { 406 + return fmt.Sprintf("workflow-network-%s-%s", id, name) 336 407 } 337 408 338 - func hostConfig(id string) *container.HostConfig { 409 + func hostConfig(id, name string) *container.HostConfig { 339 410 hostConfig := &container.HostConfig{ 340 411 Mounts: []mount.Mount{ 341 412 { 342 413 Type: mount.TypeVolume, 343 - Source: workspaceVolume(id), 414 + Source: workspaceVolume(id, name), 344 415 Target: workspaceDir, 345 416 }, 346 417 { 347 418 Type: mount.TypeVolume, 348 - Source: nixVolume(id), 419 + Source: nixVolume(id, name), 349 420 Target: "/nix", 350 421 }, 351 422 }, ··· 356 427 357 428 return hostConfig 358 429 } 430 + 431 + // thanks woodpecker 432 + func isErrContainerNotFoundOrNotRunning(err error) bool { 433 + // Error response from daemon: Cannot kill container: ...: No such container: ... 434 + // Error response from daemon: Cannot kill container: ...: Container ... is not running" 435 + // Error response from podman daemon: can only kill running containers. ... is in state exited 436 + // Error: No such container: ... 437 + return err != nil && (strings.Contains(err.Error(), "No such container") || strings.Contains(err.Error(), "is not running") || strings.Contains(err.Error(), "can only kill running containers")) 438 + }
+4 -2
spindle/server.go
··· 122 122 pipelineAtUri := fmt.Sprintf("at://%s/did:web:%s/%s", tangled.PipelineNSID, pipeline.TriggerMetadata.Repo.Knot, msg.Rkey) 123 123 124 124 rkey := TID() 125 - err = s.eng.SetupPipeline(ctx, &pipeline, pipelineAtUri, rkey) 125 + 126 + err = s.db.CreatePipeline(rkey, pipelineAtUri, s.n) 126 127 if err != nil { 127 128 return err 128 129 } 130 + 129 131 return s.eng.StartWorkflows(ctx, &pipeline, rkey) 130 132 }, 131 133 OnFail: func(error) { 132 - s.l.Error("pipeline setup failed", "error", err) 134 + s.l.Error("pipeline run failed", "error", err) 133 135 }, 134 136 }) 135 137 if ok {