From 400f1febc2d0854d78e7027279ef903bfebf5eaf Mon Sep 17 00:00:00 2001
From: bilby91 <mfernandez@crunchloop.io>
Date: Sat, 16 May 2026 12:50:48 -0300
Subject: [PATCH] runtime: add per-container memory + CPU limits
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds RunSpec.MemoryBytes (int64) and RunSpec.NanoCPUs (int64) so
callers can size each container's resources. Docker enforces via
cgroups (HostConfig.Memory + NanoCPUs); apple enforces by sizing the
per-container VM at boot (ContainerConfiguration.resources.cpus +
memoryInBytes). Apple's apiserver takes integer CPU counts, so the
bridge rounds fractional nano-cpus up to the next whole CPU.

Compose orchestrator reads from deploy.resources.limits (memory +
cpus) with a fallback to the legacy top-level mem_limit / cpus, which
matches docker compose's own precedence.

Loosens the §2.2 refusal of deploy: — we now accept deploy when it
only carries resources.limits with memory/cpus. Anything else inside
deploy (replicas, mode, placement, update_config, rollback_config,
restart_policy, endpoint_mode, labels, reservations, pids, devices,
generic_resources) keeps its specific typed refusal so users see what
to drop. Updates the two existing tests that used an empty Deploy{}
as a refusal trigger to use Deploy{Mode: "global"} instead.

Motivation: bringing up the dap monorepo on the applecontainer
backend, pnpm install on the app service got OOM-killed at apple's
1 GiB per-VM default. Without RunSpec resource fields, callers had no
way to raise the limit through this library.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .../Sources/ACBridge/lifecycle.swift          |  25 +++++
 compose/orchestrator.go                       |  26 +++++
 compose/orchestrator_test.go                  |  86 ++++++++++++++-
 compose/plan.go                               | 104 +++++++++++++++++-
 compose/plan_test.go                          |   2 +-
 .../applecontainer/lifecycle_darwin_arm64.go  |   4 +
 runtime/docker/run.go                         |   4 +
 runtime/runtime.go                            |  20 ++++
 8 files changed, 265 insertions(+), 6 deletions(-)

diff --git a/applecontainer-bridge/Sources/ACBridge/lifecycle.swift b/applecontainer-bridge/Sources/ACBridge/lifecycle.swift
index ec58f10..97f25d5 100644
--- a/applecontainer-bridge/Sources/ACBridge/lifecycle.swift
+++ b/applecontainer-bridge/Sources/ACBridge/lifecycle.swift
@@ -30,6 +30,14 @@ private struct RunSpecJSON: Decodable {
     var initProcess: Bool?
     var capAdd: [String]?
     var overrideCommand: Bool?
+    // Hard memory limit for the per-container VM, in bytes. Zero or
+    // absent leaves apple's default (1 GiB on 0.12.x) in place.
+    var memoryBytes: Int64?
+    // CPU limit in nano-units (1_000_000_000 = 1 CPU). Apple's
+    // apiserver takes an integer CPU count, so the bridge rounds up
+    // to the next whole CPU. Zero or absent leaves apple's default (4)
+    // in place.
+    var nanoCPUs: Int64?
 }
 
 private struct MountJSON: Decodable {
@@ -106,6 +114,23 @@ private func runContainer(spec: RunSpecJSON) async throws {
     cfg.mounts = try (spec.mounts ?? []).map(toFilesystem)
     cfg.capAdd = spec.capAdd ?? []
     cfg.useInit = spec.initProcess ?? false
+    // Resource limits. Apply only when caller specified a value;
+    // leave apple's Resources defaults (4 cpus / 1 GiB) untouched
+    // otherwise. Negative inputs are clamped out at the bridge
+    // boundary; the Go side rejects them earlier too.
+    if let mem = spec.memoryBytes, mem > 0 {
+        cfg.resources.memoryInBytes = UInt64(mem)
+    }
+    if let nano = spec.nanoCPUs, nano > 0 {
+        // Round up to the next whole CPU. NanoCPUs of 1_500_000_000
+        // (1.5 cpus) → cpus = 2. Apple's apiserver doesn't model
+        // fractional CPU shares; callers expressing a fractional
+        // limit get the next whole CPU rather than a silent floor.
+        let cpus = Int((nano + 999_999_999) / 1_000_000_000)
+        if cpus > 0 {
+            cfg.resources.cpus = cpus
+        }
+    }
     // Enable Rosetta when running an amd64 container on an arm64
     // host. Without this flag the apiserver rejects amd64 containers
     // with "unsupported: platform linux/amd64". Mirrors
diff --git a/compose/orchestrator.go b/compose/orchestrator.go
index ab5fac7..2c0fd8e 100644
--- a/compose/orchestrator.go
+++ b/compose/orchestrator.go
@@ -692,6 +692,7 @@ func serviceToRunSpec(
 		})
 	}
 
+	memBytes, nanoCPUs := resourcesOf(svc)
 	return runtime.RunSpec{
 		Image:         svc.Image,
 		Name:          plan.ProjectName + "-" + svc.Name + "-1",
@@ -708,9 +709,34 @@ func serviceToRunSpec(
 		HealthCheck:   healthCheckOf(svc.HealthCheck),
 		Init:          svc.Init != nil && *svc.Init,
 		CapAdd:        svc.CapAdd,
+		MemoryBytes:   memBytes,
+		NanoCPUs:      nanoCPUs,
 	}
 }
 
+// resourcesOf extracts the memory + CPU limits from a compose service.
+// deploy.resources.limits (compose v3+) wins over the legacy top-level
+// mem_limit / cpus fields when both are set, matching docker compose's
+// own precedence. Zero values mean "unset" — the backend's default
+// applies.
+func resourcesOf(svc composetypes.ServiceConfig) (memBytes, nanoCPUs int64) {
+	if d := svc.Deploy; d != nil {
+		if lim := d.Resources.Limits; lim != nil {
+			memBytes = int64(lim.MemoryBytes)
+			if cpus := lim.NanoCPUs.Value(); cpus > 0 {
+				nanoCPUs = int64(cpus * 1_000_000_000)
+			}
+		}
+	}
+	if memBytes == 0 {
+		memBytes = int64(svc.MemLimit)
+	}
+	if nanoCPUs == 0 && svc.CPUS > 0 {
+		nanoCPUs = int64(svc.CPUS * 1_000_000_000)
+	}
+	return memBytes, nanoCPUs
+}
+
 // healthCheckOf translates compose's HealthCheckConfig pointer into
 // our runtime-neutral spec. Returns nil if the service didn't
 // declare one (image's HEALTHCHECK applies as-is).
diff --git a/compose/orchestrator_test.go b/compose/orchestrator_test.go
index d6659c4..dcbbb2c 100644
--- a/compose/orchestrator_test.go
+++ b/compose/orchestrator_test.go
@@ -521,7 +521,7 @@ func TestUp_RefusesUnsupportedFields(t *testing.T) {
 	orch := NewOrchestrator(rt, "docker")
 	proj := &composetypes.Project{
 		Services: composetypes.Services{
-			"app": composetypes.ServiceConfig{Name: "app", Image: "alpine", Deploy: &composetypes.DeployConfig{}},
+			"app": composetypes.ServiceConfig{Name: "app", Image: "alpine", Deploy: &composetypes.DeployConfig{Mode: "global"}},
 		},
 	}
 	_, err := orch.Up(context.Background(), &Plan{Project: proj, ProjectName: "dc-x"})
@@ -695,6 +695,90 @@ func TestUp_AnonymousVolumesFlowThrough(t *testing.T) {
 	}
 }
 
+// TestUp_ResourceLimitsTranslate pins the compose-to-RunSpec mapping
+// for memory and CPU limits, including the deploy.resources.limits >
+// legacy mem_limit/cpus precedence. Backends translate from RunSpec;
+// this test pins the orchestrator side.
+func TestUp_ResourceLimitsTranslate(t *testing.T) {
+	cases := []struct {
+		name     string
+		mut      func(*composetypes.ServiceConfig)
+		wantMem  int64
+		wantNano int64
+	}{
+		{
+			name: "deploy_limits",
+			mut: func(s *composetypes.ServiceConfig) {
+				s.Deploy = &composetypes.DeployConfig{
+					Resources: composetypes.Resources{
+						Limits: &composetypes.Resource{
+							MemoryBytes: composetypes.UnitBytes(2 * 1024 * 1024 * 1024),
+							NanoCPUs:    composetypes.NanoCPUs(2.5),
+						},
+					},
+				}
+			},
+			wantMem:  2 * 1024 * 1024 * 1024,
+			wantNano: 2_500_000_000,
+		},
+		{
+			name: "legacy_only",
+			mut: func(s *composetypes.ServiceConfig) {
+				s.MemLimit = composetypes.UnitBytes(512 * 1024 * 1024)
+				s.CPUS = 1.5
+			},
+			wantMem:  512 * 1024 * 1024,
+			wantNano: 1_500_000_000,
+		},
+		{
+			name: "deploy_overrides_legacy",
+			mut: func(s *composetypes.ServiceConfig) {
+				s.MemLimit = composetypes.UnitBytes(128 * 1024 * 1024)
+				s.CPUS = 1.0
+				s.Deploy = &composetypes.DeployConfig{
+					Resources: composetypes.Resources{
+						Limits: &composetypes.Resource{
+							MemoryBytes: composetypes.UnitBytes(4 * 1024 * 1024 * 1024),
+							NanoCPUs:    composetypes.NanoCPUs(4),
+						},
+					},
+				}
+			},
+			wantMem:  4 * 1024 * 1024 * 1024,
+			wantNano: 4_000_000_000,
+		},
+		{
+			name:     "unset",
+			mut:      func(*composetypes.ServiceConfig) {},
+			wantMem:  0,
+			wantNano: 0,
+		},
+	}
+	for _, tc := range cases {
+		t.Run(tc.name, func(t *testing.T) {
+			rt := newMockRuntime()
+			var seen runtime.RunSpec
+			rt.OnRunContainer = func(spec runtime.RunSpec) (*runtime.Container, error) {
+				seen = spec
+				return nil, nil
+			}
+			orch := NewOrchestrator(rt, "docker")
+			svc := composetypes.ServiceConfig{Name: "app", Image: "alpine"}
+			tc.mut(&svc)
+			proj := &composetypes.Project{Services: composetypes.Services{"app": svc}}
+			if _, err := orch.Up(context.Background(), &Plan{Project: proj, ProjectName: "dc-x"}); err != nil {
+				t.Fatalf("Up: %v", err)
+			}
+			if seen.MemoryBytes != tc.wantMem {
+				t.Errorf("MemoryBytes = %d, want %d", seen.MemoryBytes, tc.wantMem)
+			}
+			if seen.NanoCPUs != tc.wantNano {
+				t.Errorf("NanoCPUs = %d, want %d", seen.NanoCPUs, tc.wantNano)
+			}
+		})
+	}
+}
+
 // TestDown_RemovesProjectNetwork pins the network-cleanup contract.
 // Up creates <project>_default; Down must call RemoveNetwork on it
 // after containers are gone. Without this, every devcontainer
diff --git a/compose/plan.go b/compose/plan.go
index 58da583..23f5bcb 100644
--- a/compose/plan.go
+++ b/compose/plan.go
@@ -138,10 +138,7 @@ func refuseUnsupportedFields(proj *composetypes.Project) error {
 			})
 		}
 		if svc.Deploy != nil {
-			found = append(found, UnsupportedField{
-				Service: name, Field: "deploy",
-				Reason: "Swarm orchestration; not implemented",
-			})
+			found = append(found, deployUnsupported(name, svc.Deploy)...)
 		}
 		if svc.Develop != nil {
 			found = append(found, UnsupportedField{
@@ -300,3 +297,102 @@ func refuseSharedVolumes(proj *composetypes.Project) error {
 	}
 	return nil
 }
+
+// deployUnsupported collects refusals for sub-fields of deploy: that
+// this orchestrator can't honor. We accept deploy when it only carries
+// resources.limits with memory/cpus — that's how compose v3+ users
+// express per-service resource limits and it maps cleanly onto
+// RunSpec.MemoryBytes / RunSpec.NanoCPUs. Everything else inside
+// deploy: (replicas, mode, placement, update_config, rollback_config,
+// restart_policy, endpoint_mode, labels, resources.reservations,
+// non-memory/cpu limits) is Swarm-flavored and refused with a specific
+// reason so the user sees what they need to drop.
+func deployUnsupported(service string, d *composetypes.DeployConfig) []UnsupportedField {
+	var out []UnsupportedField
+	if m := d.Mode; m != "" && m != "replicated" {
+		out = append(out, UnsupportedField{
+			Service: service, Field: "deploy.mode",
+			Reason: "only the implicit single-replica mode is supported",
+		})
+	}
+	if r := d.Replicas; r != nil && *r != 1 {
+		out = append(out, UnsupportedField{
+			Service: service, Field: "deploy.replicas",
+			Reason: "multi-replica services are not supported",
+		})
+	}
+	if len(d.Labels) > 0 {
+		out = append(out, UnsupportedField{
+			Service: service, Field: "deploy.labels",
+			Reason: "use service-level labels instead",
+		})
+	}
+	if d.UpdateConfig != nil {
+		out = append(out, UnsupportedField{
+			Service: service, Field: "deploy.update_config",
+			Reason: "Swarm rolling-update; not implemented",
+		})
+	}
+	if d.RollbackConfig != nil {
+		out = append(out, UnsupportedField{
+			Service: service, Field: "deploy.rollback_config",
+			Reason: "Swarm rolling-update; not implemented",
+		})
+	}
+	if d.RestartPolicy != nil {
+		out = append(out, UnsupportedField{
+			Service: service, Field: "deploy.restart_policy",
+			Reason: "use the top-level restart: field instead",
+		})
+	}
+	if d.EndpointMode != "" {
+		out = append(out, UnsupportedField{
+			Service: service, Field: "deploy.endpoint_mode",
+			Reason: "Swarm load balancing; not implemented",
+		})
+	}
+	if len(d.Placement.Constraints) > 0 || len(d.Placement.Preferences) > 0 || d.Placement.MaxReplicas != 0 {
+		out = append(out, UnsupportedField{
+			Service: service, Field: "deploy.placement",
+			Reason: "Swarm scheduling; not implemented",
+		})
+	}
+	out = append(out, resourcesUnsupported(service, d.Resources)...)
+	return out
+}
+
+// resourcesUnsupported refuses anything inside deploy.resources beyond
+// limits.memory and limits.cpus. Reservations are silently dropped on
+// our runtimes today (apple has no equivalent; docker honors them but
+// we don't currently translate them), so refusing them surfaces the
+// silent loss to the user.
+func resourcesUnsupported(service string, r composetypes.Resources) []UnsupportedField {
+	var out []UnsupportedField
+	if r.Reservations != nil {
+		out = append(out, UnsupportedField{
+			Service: service, Field: "deploy.resources.reservations",
+			Reason: "soft-limit reservations are not honored on this runtime",
+		})
+	}
+	if r.Limits != nil {
+		if r.Limits.Pids != 0 {
+			out = append(out, UnsupportedField{
+				Service: service, Field: "deploy.resources.limits.pids",
+				Reason: "pids limit is not implemented",
+			})
+		}
+		if len(r.Limits.Devices) > 0 {
+			out = append(out, UnsupportedField{
+				Service: service, Field: "deploy.resources.limits.devices",
+				Reason: "device requests are not implemented",
+			})
+		}
+		if len(r.Limits.GenericResources) > 0 {
+			out = append(out, UnsupportedField{
+				Service: service, Field: "deploy.resources.limits.generic_resources",
+				Reason: "generic resources are not implemented",
+			})
+		}
+	}
+	return out
+}
diff --git a/compose/plan_test.go b/compose/plan_test.go
index 70ed0af..8d10441 100644
--- a/compose/plan_test.go
+++ b/compose/plan_test.go
@@ -52,7 +52,7 @@ func TestValidate_RefusesSwarmFields(t *testing.T) {
 			"app": composetypes.ServiceConfig{
 				Name:   "app",
 				Image:  "alpine",
-				Deploy: &composetypes.DeployConfig{},
+				Deploy: &composetypes.DeployConfig{Mode: "global"},
 			},
 		},
 	}
diff --git a/runtime/applecontainer/lifecycle_darwin_arm64.go b/runtime/applecontainer/lifecycle_darwin_arm64.go
index 91f9737..ed0a606 100644
--- a/runtime/applecontainer/lifecycle_darwin_arm64.go
+++ b/runtime/applecontainer/lifecycle_darwin_arm64.go
@@ -38,6 +38,8 @@ type runSpecJSON struct {
 	InitProcess     bool              `json:"initProcess,omitempty"`
 	CapAdd          []string          `json:"capAdd,omitempty"`
 	OverrideCommand bool              `json:"overrideCommand,omitempty"`
+	MemoryBytes     int64             `json:"memoryBytes,omitempty"`
+	NanoCPUs        int64             `json:"nanoCPUs,omitempty"`
 }
 
 type mountJSON struct {
@@ -216,6 +218,8 @@ func runSpecToWire(spec runtime.RunSpec) runSpecJSON {
 		InitProcess:     spec.Init,
 		CapAdd:          spec.CapAdd,
 		OverrideCommand: spec.OverrideCommand,
+		MemoryBytes:     spec.MemoryBytes,
+		NanoCPUs:        spec.NanoCPUs,
 	}
 	return out
 }
diff --git a/runtime/docker/run.go b/runtime/docker/run.go
index 847611b..2c4f6d8 100644
--- a/runtime/docker/run.go
+++ b/runtime/docker/run.go
@@ -57,6 +57,10 @@ func (r *Runtime) RunContainer(ctx context.Context, spec runtime.RunSpec) (*runt
 		SecurityOpt:   spec.SecurityOpt,
 		PortBindings:  bindings,
 		RestartPolicy: toRestartPolicy(spec.RestartPolicy),
+		Resources: container.Resources{
+			Memory:   spec.MemoryBytes,
+			NanoCPUs: spec.NanoCPUs,
+		},
 	}
 	if spec.Init {
 		t := true
diff --git a/runtime/runtime.go b/runtime/runtime.go
index 796d60b..8f3784f 100644
--- a/runtime/runtime.go
+++ b/runtime/runtime.go
@@ -348,6 +348,26 @@ type RunSpec struct {
 	// OverrideCommand, when true, forces Cmd to be ["/bin/sh","-c","while sleep 1000; do :; done"]
 	// so the container stays alive for exec-based interaction. Spec default true.
 	OverrideCommand bool
+
+	// MemoryBytes is the hard memory limit for the container, in bytes.
+	// Zero means "unset": the backend's own default applies — for docker
+	// that's no cgroup limit; for apple it's the apiserver's per-VM
+	// default (1 GiB on 0.12.x). Negative values are rejected by the
+	// backend.
+	//
+	// On apple, this sizes the per-container VM at boot; the guest
+	// kernel sees exactly this much memory and the value cannot be
+	// resized without container recreation. On docker, this maps to
+	// HostConfig.Memory and is enforced by cgroups.
+	MemoryBytes int64
+
+	// NanoCPUs is the CPU limit expressed in nano-units: 1_000_000_000
+	// = one full CPU, 2_500_000_000 = 2.5 CPUs. Matches docker's
+	// HostConfig.NanoCPUs convention so a single field works across
+	// backends. Zero means "unset". Apple's apiserver takes an integer
+	// CPU count, so the value is rounded up to the next whole CPU at
+	// the bridge boundary (e.g. 1_500_000_000 → 2 cpus).
+	NanoCPUs int64
 }
 
 // PortBinding describes a host->container port publish. Translates