diff --git a/checkpoint.go b/checkpoint.go
index 3f8f407..27f74b9 100644
--- a/checkpoint.go
+++ b/checkpoint.go
@@ -38,6 +38,12 @@ type RestoreOptions struct {
 	// established connections.
 	TCPEstablished bool
 
+	// IgnoreVolumes skips restoring volume content from the archive,
+	// reusing an existing volume. Leave false for cross-node restore;
+	// set true for same-node restore-in-place. See
+	// ProjectRestoreOptions.IgnoreVolumes.
+	IgnoreVolumes bool
+
 	// LocalEnv overrides os.Environ() for the reattached workspace's
 	// substituter localEnv pass. Nil means use the current process
 	// environment — matches AttachOptions.LocalEnv. On a cross-node
@@ -117,6 +123,7 @@ func (e *Engine) Restore(ctx context.Context, opts RestoreOptions) (*Workspace,
 		ArchivePath:    opts.ArchivePath,
 		Name:           opts.Name,
 		TCPEstablished: opts.TCPEstablished,
+		IgnoreVolumes:  opts.IgnoreVolumes,
 	})
 	if err != nil {
 		return nil, fmt.Errorf("restore: %w", err)
diff --git a/checkpoint_project.go b/checkpoint_project.go
index edf951f..b273d96 100644
--- a/checkpoint_project.go
+++ b/checkpoint_project.go
@@ -3,6 +3,7 @@ package devcontainer
 import (
 	"context"
 	"encoding/json"
+	"errors"
 	"fmt"
 	"os"
 	"path/filepath"
@@ -63,6 +64,14 @@ type ProjectRestoreOptions struct {
 	// established connections.
 	TCPEstablished bool
 
+	// IgnoreVolumes skips restoring volume content from the archives,
+	// reusing whatever volumes already exist. Leave false for a
+	// cross-node restore (the destination has no volumes, so content
+	// must come from the archive). Set true for a same-node
+	// restore-in-place, where the volumes still exist with current data
+	// and re-extracting them would collide ("volume already exists").
+	IgnoreVolumes bool
+
 	// LocalEnv overrides os.Environ() for the reattached primary
 	// workspace's substituter (parity with RestoreOptions.LocalEnv).
 	LocalEnv map[string]string
@@ -199,11 +208,59 @@ func (e *Engine) RestoreProject(ctx context.Context, opts ProjectRestoreOptions)
 		return nil, fmt.Errorf("RestoreProject: %w", err)
 	}
 
+	// A cross-node restore lands on a fresh store with no project network.
+	// The checkpointed containers were attached to <project>_default, and
+	// libpod restore fails ("network not found") unless it exists first.
+	// Recreate it before restoring any container, mirroring how
+	// compose.Orchestrator.Up names + labels it (CreateNetwork is
+	// idempotent — a label-matching network is reused, so same-node
+	// restore is unaffected). Custom/extra compose networks aren't yet
+	// recorded in the manifest — see design/checkpoint-restore-fixes.md.
+	if _, err := e.runtime.CreateNetwork(ctx, runtime.NetworkSpec{
+		Name: manifest.Project + "_default",
+		Labels: map[string]string{
+			compose.LabelComposeProject: manifest.Project,
+			compose.LabelEngine:         compose.EngineDisplayName,
+		},
+	}); err != nil {
+		return nil, fmt.Errorf("RestoreProject: recreate project network: %w", err)
+	}
+
 	out := &ProjectRestore{Project: manifest.Project, Services: map[string]*runtime.Container{}}
 	for _, svc := range manifest.Services {
+		// Restore (--import) re-creates the container under its archived,
+		// deterministic compose name. On a fresh/cross-node store nothing
+		// pre-exists. On a same-node restore the checkpoint left the source
+		// container *stopped* under this name (StopAfter stops, it does not
+		// remove), which collides with re-create ("that ID is already in
+		// use"). Clear a non-running leftover — its full state is in the
+		// archive — but refuse to clobber a *running* container: that would
+		// be destroying a live service, not restoring it. RemoveVolumes
+		// stays false so the service's data volume survives for reuse.
+		name := manifest.Project + "-" + svc.Service + "-1"
+		d, ierr := e.runtime.InspectContainer(ctx, name)
+		switch {
+		case ierr != nil:
+			// Absent is the normal fresh/cross-node case — proceed. Any
+			// other inspect failure (daemon/API/permission) is surfaced
+			// rather than masked behind a downstream restore error.
+			var notFound *runtime.ContainerNotFoundError
+			if !errors.As(ierr, &notFound) {
+				return nil, fmt.Errorf("RestoreProject: service %q: inspect existing container %q: %w", svc.Service, name, ierr)
+			}
+		case d != nil:
+			if d.State == runtime.StateRunning {
+				return nil, fmt.Errorf("RestoreProject: service %q: a running container %q already exists — stop it before restoring", svc.Service, name)
+			}
+			if rerr := e.runtime.RemoveContainer(ctx, name, runtime.RemoveOptions{Force: true}); rerr != nil {
+				return nil, fmt.Errorf("RestoreProject: service %q: clearing stale container %q: %w", svc.Service, name, rerr)
+			}
+		}
+
 		c, err := cr.Restore(ctx, runtime.RestoreSpec{
 			ArchivePath:    filepath.Join(opts.ArchiveDir, svc.Archive),
 			TCPEstablished: opts.TCPEstablished,
+			IgnoreVolumes:  opts.IgnoreVolumes,
 		})
 		if err != nil {
 			return nil, fmt.Errorf("RestoreProject: service %q: %w", svc.Service, err)
diff --git a/checkpoint_project_test.go b/checkpoint_project_test.go
index 1fa0f21..ceb105c 100644
--- a/checkpoint_project_test.go
+++ b/checkpoint_project_test.go
@@ -39,6 +39,13 @@ func (f *fakeProjectRuntime) Capabilities() runtime.Capabilities {
 	return c
 }
 
+// CreateNetwork succeeds: RestoreProject recreates the project network
+// before restoring containers (cross-node fresh-store path), so the
+// checkpoint-capable fake must support it.
+func (f *fakeProjectRuntime) CreateNetwork(ctx context.Context, spec runtime.NetworkSpec) (string, error) {
+	return "net-" + spec.Name, nil
+}
+
 func (f *fakeProjectRuntime) ListContainers(ctx context.Context, filter runtime.LabelFilter) ([]runtime.Container, error) {
 	f.fakeRuntime.mu.Lock()
 	defer f.fakeRuntime.mu.Unlock()
diff --git a/compose/orchestrator.go b/compose/orchestrator.go
index 94c82ab..b1994d4 100644
--- a/compose/orchestrator.go
+++ b/compose/orchestrator.go
@@ -31,6 +31,7 @@ import (
 	"errors"
 	"fmt"
 	"sort"
+	"strings"
 	"sync"
 	"time"
 
@@ -82,15 +83,36 @@ type Orchestrator struct {
 	// PollInterval is the cadence for health polling. Tests
 	// override; production default below.
 	PollInterval time.Duration
+
+	// selfProbe is true when the backend asks the orchestrator to run
+	// healthcheck probes itself (via Exec) instead of configuring the
+	// backend's native HEALTHCHECK — see selfHealthProber and
+	// design/compose-native-health.md. Computed once at construction.
+	selfProbe bool
+}
+
+// selfHealthProber is an optional runtime capability: a backend that
+// implements it and returns true asks the orchestrator to drive
+// healthcheck probes itself rather than relying on the backend's native
+// HEALTHCHECK. Podman implements it — its native healthcheck runs eagerly
+// as root and races privilege-dropping images. Docker and Apple do not,
+// keeping their existing behavior.
+type selfHealthProber interface {
+	PreferSelfProbedHealth() bool
 }
 
 // NewOrchestrator constructs an Orchestrator with sane defaults.
 func NewOrchestrator(rt runtime.Runtime, backendName string) *Orchestrator {
+	selfProbe := false
+	if p, ok := rt.(selfHealthProber); ok {
+		selfProbe = p.PreferSelfProbedHealth()
+	}
 	return &Orchestrator{
 		rt:            rt,
 		BackendName:   backendName,
 		HealthTimeout: DefaultHealthTimeout,
 		PollInterval:  500 * time.Millisecond,
+		selfProbe:     selfProbe,
 	}
 }
 
@@ -481,6 +503,14 @@ func (o *Orchestrator) ensureService(
 	}
 
 	spec := serviceToRunSpec(plan, svc, projectLabels, hash, imageDigest)
+	if o.selfProbe {
+		// The orchestrator probes health itself (see waitFor); explicitly
+		// DISABLE the backend's native HEALTHCHECK. Nil would mean "inherit
+		// from image" (toHealthcheck), so an image-baked HEALTHCHECK would
+		// still run natively — reintroducing Podman's eager root probe that
+		// breaks privilege-dropping images. Disable emits the NONE sentinel.
+		spec.HealthCheck = &runtime.HealthCheckSpec{Disable: true}
+	}
 	c, err := o.rt.RunContainer(ctx, spec)
 	if err != nil {
 		// Compose's `up -d` pulls missing images implicitly. Mirror
@@ -582,7 +612,11 @@ func (o *Orchestrator) gateLevel(
 		if cid == "" {
 			continue
 		}
-		if err := o.waitFor(ctx, svcName, cid, req.condition, deadline); err != nil {
+		var hc *runtime.HealthCheckSpec
+		if svc, ok := plan.Project.Services[svcName]; ok {
+			hc = healthCheckOf(svc.HealthCheck)
+		}
+		if err := o.waitFor(ctx, svcName, cid, req.condition, hc, deadline); err != nil {
 			if req.optional {
 				// Per compose spec: a non-required dependency that
 				// fails to satisfy its condition does not block the
@@ -596,46 +630,63 @@ func (o *Orchestrator) gateLevel(
 	return nil
 }
 
-// waitFor polls a service's condition until satisfied or deadline.
+// waitFor polls a service's condition until satisfied or deadline. When
+// o.selfProbe is set, a service_healthy gate is evaluated by executing the
+// service's healthcheck command (hc) via Exec rather than reading the
+// backend's native health status — see selfHealthProber and
+// design/compose-native-health.md. The completion condition always reads
+// container state (no native healthcheck required either way).
 func (o *Orchestrator) waitFor(
-	ctx context.Context, svc, id, cond string, deadline time.Time,
+	ctx context.Context, svc, id, cond string, hc *runtime.HealthCheckSpec, deadline time.Time,
 ) error {
+	// In self-probe mode, honor start_period as a grace delay before the
+	// first probe so an eager probe can't run before the service inits.
+	probeNotBefore := time.Now()
+	if o.selfProbe && hc != nil {
+		probeNotBefore = probeNotBefore.Add(hc.StartPeriod)
+	}
 	for {
 		if err := ctx.Err(); err != nil {
 			return err
 		}
-		details, err := o.rt.InspectContainer(ctx, id)
-		if err == nil && details != nil {
-			switch cond {
-			case "service_healthy":
-				// Treat HealthNone as satisfied: a container with
-				// no HEALTHCHECK directive can still be a
-				// service_healthy gate target (compose's behavior),
-				// so falling back to State=Running keeps that case
-				// working. For services that DO declare a
-				// healthcheck, require Healthy explicitly.
-				switch details.Health {
-				case runtime.HealthHealthy:
-					return nil
-				case runtime.HealthNone:
-					if details.State == runtime.StateRunning {
+		if o.selfProbe && cond == "service_healthy" {
+			if o.probeHealthy(ctx, id, hc, probeNotBefore) {
+				return nil
+			}
+		} else {
+			details, err := o.rt.InspectContainer(ctx, id)
+			if err == nil && details != nil {
+				switch cond {
+				case "service_healthy":
+					// Treat HealthNone as satisfied: a container with
+					// no HEALTHCHECK directive can still be a
+					// service_healthy gate target (compose's behavior),
+					// so falling back to State=Running keeps that case
+					// working. For services that DO declare a
+					// healthcheck, require Healthy explicitly.
+					switch details.Health {
+					case runtime.HealthHealthy:
 						return nil
+					case runtime.HealthNone:
+						if details.State == runtime.StateRunning {
+							return nil
+						}
+					case runtime.HealthUnhealthy:
+						return fmt.Errorf(
+							"compose: service %q reported unhealthy while waiting on service_healthy",
+							svc,
+						)
+					}
+				case "service_completed_successfully":
+					if details.State == runtime.StateExited && details.ExitCode == 0 {
+						return nil
+					}
+					if details.State == runtime.StateExited && details.ExitCode != 0 {
+						return fmt.Errorf(
+							"compose: %s exited with code %d while waiting for completion",
+							svc, details.ExitCode,
+						)
 					}
-				case runtime.HealthUnhealthy:
-					return fmt.Errorf(
-						"compose: service %q reported unhealthy while waiting on service_healthy",
-						svc,
-					)
-				}
-			case "service_completed_successfully":
-				if details.State == runtime.StateExited && details.ExitCode == 0 {
-					return nil
-				}
-				if details.State == runtime.StateExited && details.ExitCode != 0 {
-					return fmt.Errorf(
-						"compose: %s exited with code %d while waiting for completion",
-						svc, details.ExitCode,
-					)
 				}
 			}
 		}
@@ -654,6 +705,59 @@ func (o *Orchestrator) waitFor(
 	}
 }
 
+// probeHealthy runs the service's compose healthcheck once via Exec and
+// reports whether it currently passes. A nil/disabled/NONE/empty
+// healthcheck falls back to "is the container running?", mirroring the
+// native path's HealthNone behavior. During the start_period grace
+// (before notBefore) it reports not-healthy without probing, so the first
+// probe is deferred until the service has had time to initialize.
+func (o *Orchestrator) probeHealthy(
+	ctx context.Context, id string, hc *runtime.HealthCheckSpec, notBefore time.Time,
+) bool {
+	cmd := healthProbeCmd(hc)
+	if cmd == nil {
+		details, err := o.rt.InspectContainer(ctx, id)
+		return err == nil && details != nil && details.State == runtime.StateRunning
+	}
+	if time.Now().Before(notBefore) {
+		return false
+	}
+	probeCtx := ctx
+	if hc.Timeout > 0 {
+		var cancel context.CancelFunc
+		probeCtx, cancel = context.WithTimeout(ctx, hc.Timeout)
+		defer cancel()
+	}
+	res, err := o.rt.ExecContainer(probeCtx, id, runtime.ExecOptions{Cmd: cmd})
+	return err == nil && res.ExitCode == 0
+}
+
+// healthProbeCmd converts a compose-normalized healthcheck Test into an
+// Exec command, or nil for NONE / disabled / empty. Compose normalizes
+// Test to a CMD / CMD-SHELL / NONE leading token; the default branch
+// shell-runs a bare-string form defensively.
+func healthProbeCmd(hc *runtime.HealthCheckSpec) []string {
+	if hc == nil || hc.Disable || len(hc.Test) == 0 {
+		return nil
+	}
+	switch hc.Test[0] {
+	case "NONE":
+		return nil
+	case "CMD":
+		if len(hc.Test) < 2 {
+			return nil
+		}
+		return append([]string(nil), hc.Test[1:]...)
+	case "CMD-SHELL":
+		if len(hc.Test) < 2 {
+			return nil
+		}
+		return []string{"/bin/sh", "-c", hc.Test[1]}
+	default:
+		return []string{"/bin/sh", "-c", strings.Join(hc.Test, " ")}
+	}
+}
+
 // serviceToRunSpec is the in-memory translation from compose's
 // ServiceConfig to runtime.RunSpec. This is intentionally minimal
 // for C6 — env / labels / mounts / command / entrypoint / user /
diff --git a/compose/orchestrator_health_test.go b/compose/orchestrator_health_test.go
new file mode 100644
index 0000000..9c896f9
--- /dev/null
+++ b/compose/orchestrator_health_test.go
@@ -0,0 +1,139 @@
+package compose
+
+import (
+	"context"
+	"errors"
+	"reflect"
+	"testing"
+	"time"
+
+	"github.com/crunchloop/devcontainer/runtime"
+)
+
+func TestHealthProbeCmd(t *testing.T) {
+	tests := []struct {
+		name string
+		hc   *runtime.HealthCheckSpec
+		want []string
+	}{
+		{"nil", nil, nil},
+		{"disabled", &runtime.HealthCheckSpec{Test: []string{"CMD", "true"}, Disable: true}, nil},
+		{"empty", &runtime.HealthCheckSpec{Test: nil}, nil},
+		{"none", &runtime.HealthCheckSpec{Test: []string{"NONE"}}, nil},
+		{"cmd", &runtime.HealthCheckSpec{Test: []string{"CMD", "rabbitmq-diagnostics", "-q", "ping"}}, []string{"rabbitmq-diagnostics", "-q", "ping"}},
+		{"cmd-no-args", &runtime.HealthCheckSpec{Test: []string{"CMD"}}, nil},
+		{"cmd-shell", &runtime.HealthCheckSpec{Test: []string{"CMD-SHELL", "pg_isready -U postgres"}}, []string{"/bin/sh", "-c", "pg_isready -U postgres"}},
+		{"bare-string-defensive", &runtime.HealthCheckSpec{Test: []string{"curl", "-f", "localhost"}}, []string{"/bin/sh", "-c", "curl -f localhost"}},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			got := healthProbeCmd(tt.hc)
+			if !reflect.DeepEqual(got, tt.want) {
+				t.Fatalf("healthProbeCmd(%v) = %v, want %v", tt.hc, got, tt.want)
+			}
+		})
+	}
+}
+
+// selfProbingMock is a mockRuntime that opts into orchestrator-driven
+// health probing, exercising the selfHealthProber detection.
+type selfProbingMock struct{ *mockRuntime }
+
+func (selfProbingMock) PreferSelfProbedHealth() bool { return true }
+
+func TestNewOrchestratorDetectsSelfProbe(t *testing.T) {
+	if NewOrchestrator(newMockRuntime(), "docker").selfProbe {
+		t.Error("docker mock (no selfHealthProber) should not self-probe")
+	}
+	if !NewOrchestrator(selfProbingMock{newMockRuntime()}, "podman").selfProbe {
+		t.Error("podman-like mock should self-probe")
+	}
+}
+
+// In self-probe mode the RunSpec must DISABLE the native healthcheck, not
+// leave it nil — nil means "inherit the image's HEALTHCHECK", which would
+// let Podman run an image-baked probe (as root, eagerly) and reintroduce
+// the very failure self-probing exists to avoid.
+func TestUp_SelfProbeDisablesNativeHealthcheck(t *testing.T) {
+	rt := newMockRuntime()
+	orch := NewOrchestrator(selfProbingMock{rt}, "podman")
+	proj := newProject(t, map[string][]string{"app": nil})
+
+	var got *runtime.HealthCheckSpec
+	rt.OnRunContainer = func(spec runtime.RunSpec) (*runtime.Container, error) {
+		got = spec.HealthCheck
+		return nil, nil
+	}
+	if _, err := orch.Up(context.Background(), &Plan{Project: proj, ProjectName: "dc-x"}); err != nil {
+		t.Fatalf("Up: %v", err)
+	}
+	if got == nil || !got.Disable {
+		t.Fatalf("self-probe must set HealthCheck.Disable=true (got %+v); nil would inherit the image healthcheck", got)
+	}
+}
+
+func TestWaitForSelfProbeHealthy(t *testing.T) {
+	rt := newMockRuntime()
+	var execed int
+	rt.OnExec = func(id string, opts runtime.ExecOptions) (runtime.ExecResult, error) {
+		execed++
+		// Succeed only on the 2nd probe — proves we poll the command,
+		// not the native health status.
+		if execed >= 2 {
+			return runtime.ExecResult{ExitCode: 0}, nil
+		}
+		return runtime.ExecResult{ExitCode: 1}, nil
+	}
+	orch := NewOrchestrator(selfProbingMock{rt}, "podman")
+	orch.PollInterval = time.Millisecond
+
+	hc := &runtime.HealthCheckSpec{Test: []string{"CMD", "rabbitmq-diagnostics", "-q", "ping"}}
+	err := orch.waitFor(context.Background(), "rabbitmq", "c1", "service_healthy", hc, time.Now().Add(2*time.Second))
+	if err != nil {
+		t.Fatalf("waitFor = %v, want nil (probe should pass on 2nd try)", err)
+	}
+	if execed < 2 {
+		t.Fatalf("expected >= 2 probe execs, got %d", execed)
+	}
+}
+
+func TestWaitForSelfProbeTimeout(t *testing.T) {
+	rt := newMockRuntime()
+	rt.OnExec = func(id string, opts runtime.ExecOptions) (runtime.ExecResult, error) {
+		return runtime.ExecResult{ExitCode: 1}, nil // never healthy
+	}
+	orch := NewOrchestrator(selfProbingMock{rt}, "podman")
+	orch.PollInterval = time.Millisecond
+	orch.HealthTimeout = 30 * time.Millisecond
+
+	hc := &runtime.HealthCheckSpec{Test: []string{"CMD", "false"}}
+	err := orch.waitFor(context.Background(), "svc", "c1", "service_healthy", hc, time.Now().Add(30*time.Millisecond))
+	var hte *HealthTimeoutError
+	if !errors.As(err, &hte) {
+		t.Fatalf("waitFor = %v, want *HealthTimeoutError", err)
+	}
+}
+
+// A service with no/NONE healthcheck, self-probe mode: gate is satisfied
+// by the container merely running (mirrors native HealthNone behavior).
+func TestWaitForSelfProbeNoHealthcheckRunning(t *testing.T) {
+	rt := newMockRuntime()
+	c, err := rt.RunContainer(context.Background(), runtime.RunSpec{Name: "svc"})
+	if err != nil {
+		t.Fatal(err)
+	}
+	if err := rt.StartContainer(context.Background(), c.ID); err != nil {
+		t.Fatal(err)
+	}
+	rt.OnExec = func(id string, opts runtime.ExecOptions) (runtime.ExecResult, error) {
+		t.Fatal("should not exec a probe when there is no healthcheck")
+		return runtime.ExecResult{}, nil
+	}
+	orch := NewOrchestrator(selfProbingMock{rt}, "podman")
+	orch.PollInterval = time.Millisecond
+
+	err = orch.waitFor(context.Background(), "svc", c.ID, "service_healthy", nil, time.Now().Add(time.Second))
+	if err != nil {
+		t.Fatalf("waitFor = %v, want nil (running container, no healthcheck)", err)
+	}
+}
diff --git a/compose/orchestrator_test.go b/compose/orchestrator_test.go
index 1593c66..4e1321e 100644
--- a/compose/orchestrator_test.go
+++ b/compose/orchestrator_test.go
@@ -45,6 +45,7 @@ type mockRuntime struct {
 	// Hooks (set to override default behavior).
 	OnRunContainer func(spec runtime.RunSpec) (*runtime.Container, error)
 	OnInspect      func(id string, base *runtime.ContainerDetails) *runtime.ContainerDetails
+	OnExec         func(id string, opts runtime.ExecOptions) (runtime.ExecResult, error)
 
 	// imageDigest, when non-empty, is the digest InspectImage
 	// returns for every reference. Tests that exercise digest-drift
@@ -139,6 +140,9 @@ func (m *mockRuntime) RemoveContainer(ctx context.Context, id string, opts runti
 }
 
 func (m *mockRuntime) ExecContainer(ctx context.Context, id string, opts runtime.ExecOptions) (runtime.ExecResult, error) {
+	if m.OnExec != nil {
+		return m.OnExec(id, opts)
+	}
 	return runtime.ExecResult{}, nil
 }
 
diff --git a/design/checkpoint-restore.md b/design/checkpoint-restore.md
index 3820554..6c82287 100644
--- a/design/checkpoint-restore.md
+++ b/design/checkpoint-restore.md
@@ -242,13 +242,46 @@ Model notes (validated by the Phase-0 spike, §7):
 - **Network:** the shared network re-forms as containers come back
   (`restore --import` re-attaches networking, and Podman restores the
   original container name, so service-name DNS resolves again). The network
-  must still exist on the target; recreating it cross-node is the
-  orchestrator's caller's job (or a follow-up).
+  must exist on the target *before* restore, or libpod fails with `network
+  not found`. **`RestoreProject` now recreates `<project>_default` itself**
+  (idempotent, mirroring `compose.Orchestrator.Up`'s name + labels) so a
+  cross-node restore onto a fresh store works. Only the default network is
+  recreated; custom/extra compose networks aren't recorded in the manifest
+  yet — a follow-up records the network set at checkpoint time.
+- **Replace, don't duplicate:** `restore --import` always creates a *new*
+  container under the archived (deterministic `<project>-<service>-1`)
+  name. `CheckpointProject(StopAfter)` stops but does not remove, so a
+  same-node restore-in-place collides on that name (`ID already in use`).
+  `RestoreProject` clears it first, but only when **not running** (its
+  state is in the archive; `RemoveVolumes` stays false so the data volume
+  survives) — a *running* same-named container is a genuine conflict and
+  restore errors rather than killing a live service. No-op cross-node.
+- **Volumes:** restore re-extracts volume content from the archive by
+  default (correct cross-node, where the destination has none). For a
+  same-node restore-in-place the volumes still exist and re-extraction
+  collides (`volume already exists`); `RestoreSpec.IgnoreVolumes`
+  (surfaced as `ProjectRestoreOptions`/`RestoreOptions.IgnoreVolumes` →
+  Podman `ignoreVolumes=true`) reuses the existing volume instead.
 - **Completeness:** the manifest is written last, so its presence implies a
   complete set; a partial checkpoint leaves no manifest and `RestoreProject`
   fails cleanly.
 - **Scale:** one container per service in v1 (no compose `scale`).
 
+Two **node prerequisites** the multi-service validation surfaced (both
+node-provisioning, not library code — the engine talks to a remote socket):
+
+- **CRIU `skip-file-rwx-check`** in `/etc/criu/default.conf` on every node
+  that restores. Some images set their data dir to `0700` (Postgres
+  requires it) and CRIU records the process cwd mode at checkpoint; the
+  restored volume mountpoint is `0755`, so CRIU's file-mode consistency
+  check rejects it. Podman exposes no per-restore flag for it (5.7.0), and
+  the content is correct — only the mountpoint mode differs — so the CRIU
+  config switch is the fix, alongside installing CRIU itself.
+- **Base images present on the destination.** A project restore is *not*
+  fully self-contained: Podman pulls each service's base image if absent.
+  A cross-node destination needs the images cached or registry credentials
+  (in practice it already runs the same workloads).
+
 ## 4. Capability gate
 
 Add one field to the `Capabilities` struct
@@ -362,13 +395,26 @@ What's **proven** (2026-06-19, bench pod, real image):
   peer-IP change on restore is the residual edge (matches the
   "agents reconnect" assumption).
 
+- ✅ **Real multi-service devcontainer, full project round trip + true
+  cross-node migration (2026-06-23).** An 8-service compose devcontainer
+  (Postgres, RabbitMQ, MinIO, an OAuth mock, an app sidecar, …) checkpointed
+  on node A and `RestoreProject`'d on node B — a separate, never-ran-this
+  Podman store. All 8 services resumed and in-memory/disk state survived the
+  move (a file written in the app container and a row written in Postgres
+  pre-checkpoint were present post-restore on B). Exercised the §3.3
+  decisions (network recreation, replace-guard, volume handling) and both
+  node prerequisites above. Timings: checkpoint ~9–17s, restore ~6–7s
+  cross-node / ~5s same-node-reuse for a ~580 MB archive set; storage speed
+  (local vs shared mount) and whether volume content is re-extracted are the
+  dominant variables.
+
 What's **still open** (minor):
 
-- **Working-set timing.** Idle container was fast (~0.5s same-pod, ~3.5s
-  cross-pod incl. rootfs unpack); a busy agent's memory footprint sets the
-  real checkpoint time vs the eviction window. Measure on a real workload.
-- **Forced cross-node** placement (anti-affinity) — belt-and-suspenders;
-  the archive is already proven node-independent.
+- **Working-set timing on a busy agent.** The validations above were on
+  freshly-started services; a saturated agent's memory footprint still sets
+  the real checkpoint time vs the eviction window — measure under load.
+- **Forced cross-node** placement (anti-affinity) — now exercised across two
+  isolated Podman stores (2026-06-23); the archive is proven node-independent.
 
 ## 8. Constraints, risks & mitigations
 
diff --git a/design/compose-native-health.md b/design/compose-native-health.md
new file mode 100644
index 0000000..0f272a1
--- /dev/null
+++ b/design/compose-native-health.md
@@ -0,0 +1,166 @@
+# Design — Orchestrator-driven health probing (Podman)
+
+**Status:** Draft for review
+**Date:** 2026-06-23
+**Scope:** Make the native compose orchestrator (`compose/orchestrator.go`)
+run healthcheck probes itself via `Exec` on backends whose native
+HEALTHCHECK is unsafe, instead of configuring the backend's native
+healthcheck. Fixes RabbitMQ (and any privilege-dropping image) failing to
+boot under the Podman backend.
+
+Companion to `design/compose-native.md` (the orchestrator) and
+`design/podman-backend.md` (the Podman runtime).
+
+---
+
+## 1. Problem
+
+A `rabbitmq` service in a multi-service compose devcontainer exits 1 under
+the Podman backend:
+
+```
+Error when reading /var/lib/rabbitmq/.erlang.cookie: eacces
+```
+
+Root cause (reproduced deterministically, incl. bare `podman run
+--health-cmd ...` — i.e. a Podman behavior, not a bug in our translation):
+
+1. The rabbitmq image runs as root; its entrypoint `gosu`-drops
+   `rabbitmq-server` to uid 999.
+2. **Podman runs the container HEALTHCHECK as root and fires the first
+   probe ~1-2s after start** (it does *not* defer the first probe for
+   `start_period`).
+3. rabbitmq's healthcheck `rabbitmq-diagnostics -q ping` is an Erlang
+   escript. Run as root with `HOME=/var/lib/rabbitmq` and no cookie yet,
+   it **creates `/var/lib/rabbitmq/.erlang.cookie` owned root:root,
+   mode 400**.
+4. The uid-999 server then cannot read root's cookie → `eacces` → exit 1.
+
+Docker is unaffected because it defers the first probe past the interval,
+so the 999 server writes the `999:999` cookie first; a later root probe
+just reads it (root bypasses DAC). Only Erlang-cookie-style images are hit
+— `pg_isready` / `curl` probes don't create root-owned files the main
+process must read.
+
+Evidence: no healthcheck → works; `CMD true` healthcheck → works; real
+healthcheck (even with `start_period: 120s`) → fails; first probe deferred
+until after server init → works.
+
+## 2. Decision
+
+On backends that opt in, the orchestrator **owns health probing**:
+
+- **Do not** set `RunSpec.HealthCheck` (the backend never runs a native
+  HEALTHCHECK, so no eager root probe).
+- For `depends_on.<svc>.condition: service_healthy` gates, the orchestrator
+  runs the service's compose `test` command itself via `ExecContainer`,
+  polling until it exits 0 (healthy) or `HealthTimeout` (→
+  `HealthTimeoutError`).
+
+Because probing is driven by the gate loop — which runs only *after* the
+level's containers have reached running — the first probe lands after the
+service has begun initializing, mirroring Docker's deferred-first-probe
+behavior. `start_period` is honored as a grace delay before the first
+probe.
+
+Services that declare a healthcheck but are **not** gated by any dependent
+(e.g. rabbitmq, when the dependent depends on it with the list/`service_started`
+form) are simply never probed — they run without a health status, which is
+correct: nothing consumes it, and the eager-root-probe crash is gone.
+
+### 2.1 Backend opt-in
+
+A backend signals the preference through an optional, consumer-defined
+interface (Go idiom — the orchestrator declares what it needs):
+
+```go
+// in compose/
+type selfHealthProber interface { PreferSelfProbedHealth() bool }
+```
+
+`runtime/podman.Runtime` implements it returning `true`. Docker and
+Apple do not implement it → unchanged behavior (Docker keeps native
+healthchecks + `InspectContainer().Health` gating; Apple keeps its
+`Healthchecks: false` fallback).
+
+Rejected alternatives:
+- A `runtime.Capabilities` bit — `Capabilities` models compose *feature*
+  support; "native healthcheck runs eagerly as root" is a backend quirk,
+  not a feature. Keeping it out of `Capabilities` avoids conflating the two.
+- Type-asserting `*podman.Runtime` in the engine — couples the engine to a
+  concrete backend and risks an import cycle.
+
+### 2.2 Probe semantics
+
+- `test` is compose-normalized to `["CMD", arg...]`, `["CMD-SHELL", str]`,
+  or `["NONE"]`. CMD → `ExecOptions.Cmd = test[1:]`; CMD-SHELL →
+  `["/bin/sh", "-c", test[1]]`; NONE / empty / `Disable` → treated as "no
+  healthcheck" → fall back to `State == Running` (same as today's
+  `HealthNone`).
+- Exec runs as the container's default user (`ExecOptions.User=""`). Safe
+  because probing is deferred: by the time we probe, the main process has
+  initialized. (Residual edge: a *gated* Erlang-cookie service with no
+  `start_period` could still be probed before its server writes the cookie;
+  `start_period` closes that. The motivating project does not hit this —
+  its rabbitmq is not gated.)
+- Exit 0 → healthy → gate satisfied. Non-zero → keep polling until deadline.
+
+## 3. Scope of change
+
+- `runtime/podman/podman.go`: add `PreferSelfProbedHealth() bool`.
+- `compose/orchestrator.go`: detect opt-in; omit `HealthCheck` from the
+  RunSpec when opted in; in `waitFor`, self-probe `service_healthy` gates
+  via Exec using the service's healthcheck spec.
+- Tests: unit (test→exec translation; self-probe gate satisfied/timeout via
+  a fake runtime). Validated manually end-to-end against a real
+  multi-service compose devcontainer on Podman (OrbStack) — rabbitmq boots
+  clean; a Podman integration test is follow-up work.
+
+No change to the Docker or Apple paths.
+
+## 3.1 Two coexisting health-check paths (the divergence)
+
+This change introduces a **second** health mechanism rather than replacing
+the first, so the orchestrator now has two, selected per-backend by
+`selfProbe`:
+
+| | Native path (Docker, Apple) | Self-probe path (Podman) |
+|---|---|---|
+| Healthcheck on container | `RunSpec.HealthCheck` set; runtime runs it | omitted |
+| Who probes | the runtime, as the container's user | the orchestrator, via `ExecContainer` |
+| `service_healthy` gate reads | `InspectContainer().Health` | exit code of the exec'd `test` |
+| Selected when | `selfProbe == false` | `selfProbe == true` |
+
+**Behavioral divergence to be aware of**, beyond two code paths to
+maintain: on the native path *every* service with a `healthcheck:` gets a
+persistent health status surfaced by the runtime (`podman ps` / `docker ps`
+HEALTH column). On the self-probe path the native healthcheck is omitted
+entirely, so (a) only services actually gated by a `service_healthy`
+dependency are ever probed, and (b) the probe result is transient — used to
+satisfy the gate, never surfaced as the container's reported health. For
+the motivating project this is invisible (nothing consumes rabbitmq's
+health; a `service → postgres` `service_healthy` gate still works via the
+self-probe), but the HEALTH column differs
+between backends.
+
+This fork is deliberate — Podman's eager-root healthcheck forced it — not
+accidental. Convergence options, to revisit when the `compose-native`
+rollout flips the default backend (`design/compose-native.md` §10):
+1. Unify on self-probe for all backends — one path, Docker parity, but lose
+   the runtime's native HEALTH surface.
+2. Keep the fork but also set a native healthcheck for non-gated services on
+   the self-probe path where safe — narrows the divergence, more complex.
+3. Leave as-is, documented (current choice).
+
+## 4. Out of scope — rootless Podman + gosu
+
+A *distinct* failure exists for the same image class under **rootless**
+Podman: `gosu`'s setuid/setgid privilege drop is denied by the user
+namespace (containers/podman#6816), so `rabbitmq-server` can't drop to uid
+999 at all. That is a different mechanism from the healthcheck race fixed
+here and this change does **not** address it. Our validation is on
+**rootful** Podman (the C/R target — root socket), where gosu works; if a
+rootless deployment is ever targeted, rabbitmq needs a separate remedy
+(e.g. `user:` pinning + uid-mapped volumes). Confirmed via #20893 that
+`--health-start-period` is doc-only and never defers the first probe, so it
+is not a viable alternative.
diff --git a/runtime/podman/checkpoint.go b/runtime/podman/checkpoint.go
index 63120d2..ecbb638 100644
--- a/runtime/podman/checkpoint.go
+++ b/runtime/podman/checkpoint.go
@@ -75,6 +75,9 @@ func (r *Runtime) Restore(ctx context.Context, spec runtime.RestoreSpec) (*runti
 	q := url.Values{}
 	q.Set("import", "true")
 	q.Set("tcpestablished", strconv.FormatBool(spec.TCPEstablished))
+	if spec.IgnoreVolumes {
+		q.Set("ignoreVolumes", "true")
+	}
 	if spec.Name != "" {
 		q.Set("name", spec.Name)
 	}
diff --git a/runtime/podman/podman.go b/runtime/podman/podman.go
index 004b7d1..c6d9433 100644
--- a/runtime/podman/podman.go
+++ b/runtime/podman/podman.go
@@ -119,3 +119,15 @@ func (r *Runtime) Capabilities() runtime.Capabilities {
 		Checkpoint:       r.checkpointOK,
 	}
 }
+
+// PreferSelfProbedHealth tells the compose orchestrator to run healthcheck
+// probes itself (via Exec) rather than configuring Podman's native
+// HEALTHCHECK. Podman runs the native healthcheck as root and fires the
+// first probe immediately at container start — before the main process
+// initializes — which breaks privilege-dropping images: e.g. rabbitmq's
+// `rabbitmq-diagnostics` probe, run as root, creates a root-owned
+// /var/lib/rabbitmq/.erlang.cookie that the gosu-dropped (uid 999) server
+// then cannot read (eacces). Letting the orchestrator probe defers the
+// first check until after the service is up, matching Docker's behavior.
+// See design/compose-native-health.md.
+func (r *Runtime) PreferSelfProbedHealth() bool { return true }
diff --git a/runtime/runtime.go b/runtime/runtime.go
index 0e47673..7a69fdf 100644
--- a/runtime/runtime.go
+++ b/runtime/runtime.go
@@ -148,6 +148,14 @@ type RestoreSpec struct {
 	// TCPEstablished must match the checkpoint when the archive captured
 	// established connections.
 	TCPEstablished bool
+
+	// IgnoreVolumes asks the backend to skip restoring volume content
+	// from the archive, reusing whatever volume already exists. For
+	// Podman this maps to restore's ignore-volumes; cross-node restore
+	// leaves it false (content must come from the archive), same-node
+	// restore-in-place sets it true to avoid a "volume already exists"
+	// collision.
+	IgnoreVolumes bool
 }
 
 // CheckpointRef describes a written checkpoint archive.