Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions checkpoint.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,12 @@ type RestoreOptions struct {
// established connections.
TCPEstablished bool

// IgnoreVolumes skips restoring volume content from the archive,
// reusing an existing volume. Leave false for cross-node restore;
// set true for same-node restore-in-place. See
// ProjectRestoreOptions.IgnoreVolumes.
IgnoreVolumes bool

// LocalEnv overrides os.Environ() for the reattached workspace's
// substituter localEnv pass. Nil means use the current process
// environment — matches AttachOptions.LocalEnv. On a cross-node
Expand Down Expand Up @@ -117,6 +123,7 @@ func (e *Engine) Restore(ctx context.Context, opts RestoreOptions) (*Workspace,
ArchivePath: opts.ArchivePath,
Name: opts.Name,
TCPEstablished: opts.TCPEstablished,
IgnoreVolumes: opts.IgnoreVolumes,
})
if err != nil {
return nil, fmt.Errorf("restore: %w", err)
Expand Down
57 changes: 57 additions & 0 deletions checkpoint_project.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package devcontainer
import (
"context"
"encoding/json"
"errors"
"fmt"
"os"
"path/filepath"
Expand Down Expand Up @@ -63,6 +64,14 @@ type ProjectRestoreOptions struct {
// established connections.
TCPEstablished bool

// IgnoreVolumes skips restoring volume content from the archives,
// reusing whatever volumes already exist. Leave false for a
// cross-node restore (the destination has no volumes, so content
// must come from the archive). Set true for a same-node
// restore-in-place, where the volumes still exist with current data
// and re-extracting them would collide ("volume already exists").
IgnoreVolumes bool

// LocalEnv overrides os.Environ() for the reattached primary
// workspace's substituter (parity with RestoreOptions.LocalEnv).
LocalEnv map[string]string
Expand Down Expand Up @@ -199,11 +208,59 @@ func (e *Engine) RestoreProject(ctx context.Context, opts ProjectRestoreOptions)
return nil, fmt.Errorf("RestoreProject: %w", err)
}

// A cross-node restore lands on a fresh store with no project network.
// The checkpointed containers were attached to <project>_default, and
// libpod restore fails ("network not found") unless it exists first.
// Recreate it before restoring any container, mirroring how
// compose.Orchestrator.Up names + labels it (CreateNetwork is
// idempotent — a label-matching network is reused, so same-node
// restore is unaffected). Custom/extra compose networks aren't yet
// recorded in the manifest — see design/checkpoint-restore-fixes.md.
if _, err := e.runtime.CreateNetwork(ctx, runtime.NetworkSpec{
Name: manifest.Project + "_default",
Labels: map[string]string{
compose.LabelComposeProject: manifest.Project,
compose.LabelEngine: compose.EngineDisplayName,
},
}); err != nil {
return nil, fmt.Errorf("RestoreProject: recreate project network: %w", err)
}

out := &ProjectRestore{Project: manifest.Project, Services: map[string]*runtime.Container{}}
for _, svc := range manifest.Services {
// Restore (--import) re-creates the container under its archived,
// deterministic compose name. On a fresh/cross-node store nothing
// pre-exists. On a same-node restore the checkpoint left the source
// container *stopped* under this name (StopAfter stops, it does not
// remove), which collides with re-create ("that ID is already in
// use"). Clear a non-running leftover — its full state is in the
// archive — but refuse to clobber a *running* container: that would
// be destroying a live service, not restoring it. RemoveVolumes
// stays false so the service's data volume survives for reuse.
name := manifest.Project + "-" + svc.Service + "-1"
d, ierr := e.runtime.InspectContainer(ctx, name)
switch {
case ierr != nil:
// Absent is the normal fresh/cross-node case — proceed. Any
// other inspect failure (daemon/API/permission) is surfaced
// rather than masked behind a downstream restore error.
var notFound *runtime.ContainerNotFoundError
if !errors.As(ierr, &notFound) {
return nil, fmt.Errorf("RestoreProject: service %q: inspect existing container %q: %w", svc.Service, name, ierr)
}
case d != nil:
if d.State == runtime.StateRunning {
return nil, fmt.Errorf("RestoreProject: service %q: a running container %q already exists — stop it before restoring", svc.Service, name)
}
if rerr := e.runtime.RemoveContainer(ctx, name, runtime.RemoveOptions{Force: true}); rerr != nil {
return nil, fmt.Errorf("RestoreProject: service %q: clearing stale container %q: %w", svc.Service, name, rerr)
}
}

c, err := cr.Restore(ctx, runtime.RestoreSpec{
ArchivePath: filepath.Join(opts.ArchiveDir, svc.Archive),
TCPEstablished: opts.TCPEstablished,
IgnoreVolumes: opts.IgnoreVolumes,
})
if err != nil {
return nil, fmt.Errorf("RestoreProject: service %q: %w", svc.Service, err)
Expand Down
7 changes: 7 additions & 0 deletions checkpoint_project_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,13 @@ func (f *fakeProjectRuntime) Capabilities() runtime.Capabilities {
return c
}

// CreateNetwork succeeds: RestoreProject recreates the project network
// before restoring containers (cross-node fresh-store path), so the
// checkpoint-capable fake must support it.
func (f *fakeProjectRuntime) CreateNetwork(ctx context.Context, spec runtime.NetworkSpec) (string, error) {
return "net-" + spec.Name, nil
}

func (f *fakeProjectRuntime) ListContainers(ctx context.Context, filter runtime.LabelFilter) ([]runtime.Container, error) {
f.fakeRuntime.mu.Lock()
defer f.fakeRuntime.mu.Unlock()
Expand Down
170 changes: 137 additions & 33 deletions compose/orchestrator.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ import (
"errors"
"fmt"
"sort"
"strings"
"sync"
"time"

Expand Down Expand Up @@ -82,15 +83,36 @@ type Orchestrator struct {
// PollInterval is the cadence for health polling. Tests
// override; production default below.
PollInterval time.Duration

// selfProbe is true when the backend asks the orchestrator to run
// healthcheck probes itself (via Exec) instead of configuring the
// backend's native HEALTHCHECK — see selfHealthProber and
// design/compose-native-health.md. Computed once at construction.
selfProbe bool
}

// selfHealthProber is an optional runtime capability: a backend that
// implements it and returns true asks the orchestrator to drive
// healthcheck probes itself rather than relying on the backend's native
// HEALTHCHECK. Podman implements it — its native healthcheck runs eagerly
// as root and races privilege-dropping images. Docker and Apple do not,
// keeping their existing behavior.
type selfHealthProber interface {
PreferSelfProbedHealth() bool
}

// NewOrchestrator constructs an Orchestrator with sane defaults.
func NewOrchestrator(rt runtime.Runtime, backendName string) *Orchestrator {
selfProbe := false
if p, ok := rt.(selfHealthProber); ok {
selfProbe = p.PreferSelfProbedHealth()
}
return &Orchestrator{
rt: rt,
BackendName: backendName,
HealthTimeout: DefaultHealthTimeout,
PollInterval: 500 * time.Millisecond,
selfProbe: selfProbe,
}
}

Expand Down Expand Up @@ -481,6 +503,14 @@ func (o *Orchestrator) ensureService(
}

spec := serviceToRunSpec(plan, svc, projectLabels, hash, imageDigest)
if o.selfProbe {
// The orchestrator probes health itself (see waitFor); explicitly
// DISABLE the backend's native HEALTHCHECK. Nil would mean "inherit
// from image" (toHealthcheck), so an image-baked HEALTHCHECK would
// still run natively — reintroducing Podman's eager root probe that
// breaks privilege-dropping images. Disable emits the NONE sentinel.
spec.HealthCheck = &runtime.HealthCheckSpec{Disable: true}
}
Comment thread
coderabbitai[bot] marked this conversation as resolved.
c, err := o.rt.RunContainer(ctx, spec)
if err != nil {
// Compose's `up -d` pulls missing images implicitly. Mirror
Expand Down Expand Up @@ -582,7 +612,11 @@ func (o *Orchestrator) gateLevel(
if cid == "" {
continue
}
if err := o.waitFor(ctx, svcName, cid, req.condition, deadline); err != nil {
var hc *runtime.HealthCheckSpec
if svc, ok := plan.Project.Services[svcName]; ok {
hc = healthCheckOf(svc.HealthCheck)
}
if err := o.waitFor(ctx, svcName, cid, req.condition, hc, deadline); err != nil {
if req.optional {
// Per compose spec: a non-required dependency that
// fails to satisfy its condition does not block the
Expand All @@ -596,46 +630,63 @@ func (o *Orchestrator) gateLevel(
return nil
}

// waitFor polls a service's condition until satisfied or deadline.
// waitFor polls a service's condition until satisfied or deadline. When
// o.selfProbe is set, a service_healthy gate is evaluated by executing the
// service's healthcheck command (hc) via Exec rather than reading the
// backend's native health status — see selfHealthProber and
// design/compose-native-health.md. The completion condition always reads
// container state (no native healthcheck required either way).
func (o *Orchestrator) waitFor(
ctx context.Context, svc, id, cond string, deadline time.Time,
ctx context.Context, svc, id, cond string, hc *runtime.HealthCheckSpec, deadline time.Time,
) error {
// In self-probe mode, honor start_period as a grace delay before the
// first probe so an eager probe can't run before the service inits.
probeNotBefore := time.Now()
if o.selfProbe && hc != nil {
probeNotBefore = probeNotBefore.Add(hc.StartPeriod)
}
for {
if err := ctx.Err(); err != nil {
return err
}
details, err := o.rt.InspectContainer(ctx, id)
if err == nil && details != nil {
switch cond {
case "service_healthy":
// Treat HealthNone as satisfied: a container with
// no HEALTHCHECK directive can still be a
// service_healthy gate target (compose's behavior),
// so falling back to State=Running keeps that case
// working. For services that DO declare a
// healthcheck, require Healthy explicitly.
switch details.Health {
case runtime.HealthHealthy:
return nil
case runtime.HealthNone:
if details.State == runtime.StateRunning {
if o.selfProbe && cond == "service_healthy" {
if o.probeHealthy(ctx, id, hc, probeNotBefore) {
return nil
}
} else {
details, err := o.rt.InspectContainer(ctx, id)
if err == nil && details != nil {
switch cond {
case "service_healthy":
// Treat HealthNone as satisfied: a container with
// no HEALTHCHECK directive can still be a
// service_healthy gate target (compose's behavior),
// so falling back to State=Running keeps that case
// working. For services that DO declare a
// healthcheck, require Healthy explicitly.
switch details.Health {
case runtime.HealthHealthy:
return nil
case runtime.HealthNone:
if details.State == runtime.StateRunning {
return nil
}
case runtime.HealthUnhealthy:
return fmt.Errorf(
"compose: service %q reported unhealthy while waiting on service_healthy",
svc,
)
}
case "service_completed_successfully":
if details.State == runtime.StateExited && details.ExitCode == 0 {
return nil
}
if details.State == runtime.StateExited && details.ExitCode != 0 {
return fmt.Errorf(
"compose: %s exited with code %d while waiting for completion",
svc, details.ExitCode,
)
}
case runtime.HealthUnhealthy:
return fmt.Errorf(
"compose: service %q reported unhealthy while waiting on service_healthy",
svc,
)
}
case "service_completed_successfully":
if details.State == runtime.StateExited && details.ExitCode == 0 {
return nil
}
if details.State == runtime.StateExited && details.ExitCode != 0 {
return fmt.Errorf(
"compose: %s exited with code %d while waiting for completion",
svc, details.ExitCode,
)
}
}
}
Expand All @@ -654,6 +705,59 @@ func (o *Orchestrator) waitFor(
}
}

// probeHealthy runs the service's compose healthcheck once via Exec and
// reports whether it currently passes. A nil/disabled/NONE/empty
// healthcheck falls back to "is the container running?", mirroring the
// native path's HealthNone behavior. During the start_period grace
// (before notBefore) it reports not-healthy without probing, so the first
// probe is deferred until the service has had time to initialize.
func (o *Orchestrator) probeHealthy(
ctx context.Context, id string, hc *runtime.HealthCheckSpec, notBefore time.Time,
) bool {
cmd := healthProbeCmd(hc)
if cmd == nil {
details, err := o.rt.InspectContainer(ctx, id)
return err == nil && details != nil && details.State == runtime.StateRunning
}
if time.Now().Before(notBefore) {
return false
}
probeCtx := ctx
if hc.Timeout > 0 {
var cancel context.CancelFunc
probeCtx, cancel = context.WithTimeout(ctx, hc.Timeout)
defer cancel()
}
res, err := o.rt.ExecContainer(probeCtx, id, runtime.ExecOptions{Cmd: cmd})
return err == nil && res.ExitCode == 0
}

// healthProbeCmd converts a compose-normalized healthcheck Test into an
// Exec command, or nil for NONE / disabled / empty. Compose normalizes
// Test to a CMD / CMD-SHELL / NONE leading token; the default branch
// shell-runs a bare-string form defensively.
func healthProbeCmd(hc *runtime.HealthCheckSpec) []string {
if hc == nil || hc.Disable || len(hc.Test) == 0 {
return nil
}
switch hc.Test[0] {
case "NONE":
return nil
case "CMD":
if len(hc.Test) < 2 {
return nil
}
return append([]string(nil), hc.Test[1:]...)
case "CMD-SHELL":
if len(hc.Test) < 2 {
return nil
}
return []string{"/bin/sh", "-c", hc.Test[1]}
default:
return []string{"/bin/sh", "-c", strings.Join(hc.Test, " ")}
}
}

// serviceToRunSpec is the in-memory translation from compose's
// ServiceConfig to runtime.RunSpec. This is intentionally minimal
// for C6 — env / labels / mounts / command / entrypoint / user /
Expand Down
Loading
Loading