From 400f1febc2d0854d78e7027279ef903bfebf5eaf Mon Sep 17 00:00:00 2001 From: bilby91 Date: Sat, 16 May 2026 12:50:48 -0300 Subject: [PATCH] runtime: add per-container memory + CPU limits MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds RunSpec.MemoryBytes (int64) and RunSpec.NanoCPUs (int64) so callers can size each container's resources. Docker enforces via cgroups (HostConfig.Memory + NanoCPUs); apple enforces by sizing the per-container VM at boot (ContainerConfiguration.resources.cpus + memoryInBytes). Apple's apiserver takes integer CPU counts, so the bridge rounds fractional nano-cpus up to the next whole CPU. Compose orchestrator reads from deploy.resources.limits (memory + cpus) with a fallback to the legacy top-level mem_limit / cpus, which matches docker compose's own precedence. Loosens the §2.2 refusal of deploy: — we now accept deploy when it only carries resources.limits with memory/cpus. Anything else inside deploy (replicas, mode, placement, update_config, rollback_config, restart_policy, endpoint_mode, labels, reservations, pids, devices, generic_resources) keeps its specific typed refusal so users see what to drop. Updates the two existing tests that used an empty Deploy{} as a refusal trigger to use Deploy{Mode: "global"} instead. Motivation: bringing up the dap monorepo on the applecontainer backend, pnpm install on the app service got OOM-killed at apple's 1 GiB per-VM default. Without RunSpec resource fields, callers had no way to raise the limit through this library. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../Sources/ACBridge/lifecycle.swift | 25 +++++ compose/orchestrator.go | 26 +++++ compose/orchestrator_test.go | 86 ++++++++++++++- compose/plan.go | 104 +++++++++++++++++- compose/plan_test.go | 2 +- .../applecontainer/lifecycle_darwin_arm64.go | 4 + runtime/docker/run.go | 4 + runtime/runtime.go | 20 ++++ 8 files changed, 265 insertions(+), 6 deletions(-) diff --git a/applecontainer-bridge/Sources/ACBridge/lifecycle.swift b/applecontainer-bridge/Sources/ACBridge/lifecycle.swift index ec58f10..97f25d5 100644 --- a/applecontainer-bridge/Sources/ACBridge/lifecycle.swift +++ b/applecontainer-bridge/Sources/ACBridge/lifecycle.swift @@ -30,6 +30,14 @@ private struct RunSpecJSON: Decodable { var initProcess: Bool? var capAdd: [String]? var overrideCommand: Bool? + // Hard memory limit for the per-container VM, in bytes. Zero or + // absent leaves apple's default (1 GiB on 0.12.x) in place. + var memoryBytes: Int64? + // CPU limit in nano-units (1_000_000_000 = 1 CPU). Apple's + // apiserver takes an integer CPU count, so the bridge rounds up + // to the next whole CPU. Zero or absent leaves apple's default (4) + // in place. + var nanoCPUs: Int64? } private struct MountJSON: Decodable { @@ -106,6 +114,23 @@ private func runContainer(spec: RunSpecJSON) async throws { cfg.mounts = try (spec.mounts ?? []).map(toFilesystem) cfg.capAdd = spec.capAdd ?? [] cfg.useInit = spec.initProcess ?? false + // Resource limits. Apply only when caller specified a value; + // leave apple's Resources defaults (4 cpus / 1 GiB) untouched + // otherwise. Negative inputs are clamped out at the bridge + // boundary; the Go side rejects them earlier too. + if let mem = spec.memoryBytes, mem > 0 { + cfg.resources.memoryInBytes = UInt64(mem) + } + if let nano = spec.nanoCPUs, nano > 0 { + // Round up to the next whole CPU. NanoCPUs of 1_500_000_000 + // (1.5 cpus) → cpus = 2. Apple's apiserver doesn't model + // fractional CPU shares; callers expressing a fractional + // limit get the next whole CPU rather than a silent floor. + let cpus = Int((nano + 999_999_999) / 1_000_000_000) + if cpus > 0 { + cfg.resources.cpus = cpus + } + } // Enable Rosetta when running an amd64 container on an arm64 // host. Without this flag the apiserver rejects amd64 containers // with "unsupported: platform linux/amd64". Mirrors diff --git a/compose/orchestrator.go b/compose/orchestrator.go index ab5fac7..2c0fd8e 100644 --- a/compose/orchestrator.go +++ b/compose/orchestrator.go @@ -692,6 +692,7 @@ func serviceToRunSpec( }) } + memBytes, nanoCPUs := resourcesOf(svc) return runtime.RunSpec{ Image: svc.Image, Name: plan.ProjectName + "-" + svc.Name + "-1", @@ -708,9 +709,34 @@ func serviceToRunSpec( HealthCheck: healthCheckOf(svc.HealthCheck), Init: svc.Init != nil && *svc.Init, CapAdd: svc.CapAdd, + MemoryBytes: memBytes, + NanoCPUs: nanoCPUs, } } +// resourcesOf extracts the memory + CPU limits from a compose service. +// deploy.resources.limits (compose v3+) wins over the legacy top-level +// mem_limit / cpus fields when both are set, matching docker compose's +// own precedence. Zero values mean "unset" — the backend's default +// applies. +func resourcesOf(svc composetypes.ServiceConfig) (memBytes, nanoCPUs int64) { + if d := svc.Deploy; d != nil { + if lim := d.Resources.Limits; lim != nil { + memBytes = int64(lim.MemoryBytes) + if cpus := lim.NanoCPUs.Value(); cpus > 0 { + nanoCPUs = int64(cpus * 1_000_000_000) + } + } + } + if memBytes == 0 { + memBytes = int64(svc.MemLimit) + } + if nanoCPUs == 0 && svc.CPUS > 0 { + nanoCPUs = int64(svc.CPUS * 1_000_000_000) + } + return memBytes, nanoCPUs +} + // healthCheckOf translates compose's HealthCheckConfig pointer into // our runtime-neutral spec. Returns nil if the service didn't // declare one (image's HEALTHCHECK applies as-is). diff --git a/compose/orchestrator_test.go b/compose/orchestrator_test.go index d6659c4..dcbbb2c 100644 --- a/compose/orchestrator_test.go +++ b/compose/orchestrator_test.go @@ -521,7 +521,7 @@ func TestUp_RefusesUnsupportedFields(t *testing.T) { orch := NewOrchestrator(rt, "docker") proj := &composetypes.Project{ Services: composetypes.Services{ - "app": composetypes.ServiceConfig{Name: "app", Image: "alpine", Deploy: &composetypes.DeployConfig{}}, + "app": composetypes.ServiceConfig{Name: "app", Image: "alpine", Deploy: &composetypes.DeployConfig{Mode: "global"}}, }, } _, err := orch.Up(context.Background(), &Plan{Project: proj, ProjectName: "dc-x"}) @@ -695,6 +695,90 @@ func TestUp_AnonymousVolumesFlowThrough(t *testing.T) { } } +// TestUp_ResourceLimitsTranslate pins the compose-to-RunSpec mapping +// for memory and CPU limits, including the deploy.resources.limits > +// legacy mem_limit/cpus precedence. Backends translate from RunSpec; +// this test pins the orchestrator side. +func TestUp_ResourceLimitsTranslate(t *testing.T) { + cases := []struct { + name string + mut func(*composetypes.ServiceConfig) + wantMem int64 + wantNano int64 + }{ + { + name: "deploy_limits", + mut: func(s *composetypes.ServiceConfig) { + s.Deploy = &composetypes.DeployConfig{ + Resources: composetypes.Resources{ + Limits: &composetypes.Resource{ + MemoryBytes: composetypes.UnitBytes(2 * 1024 * 1024 * 1024), + NanoCPUs: composetypes.NanoCPUs(2.5), + }, + }, + } + }, + wantMem: 2 * 1024 * 1024 * 1024, + wantNano: 2_500_000_000, + }, + { + name: "legacy_only", + mut: func(s *composetypes.ServiceConfig) { + s.MemLimit = composetypes.UnitBytes(512 * 1024 * 1024) + s.CPUS = 1.5 + }, + wantMem: 512 * 1024 * 1024, + wantNano: 1_500_000_000, + }, + { + name: "deploy_overrides_legacy", + mut: func(s *composetypes.ServiceConfig) { + s.MemLimit = composetypes.UnitBytes(128 * 1024 * 1024) + s.CPUS = 1.0 + s.Deploy = &composetypes.DeployConfig{ + Resources: composetypes.Resources{ + Limits: &composetypes.Resource{ + MemoryBytes: composetypes.UnitBytes(4 * 1024 * 1024 * 1024), + NanoCPUs: composetypes.NanoCPUs(4), + }, + }, + } + }, + wantMem: 4 * 1024 * 1024 * 1024, + wantNano: 4_000_000_000, + }, + { + name: "unset", + mut: func(*composetypes.ServiceConfig) {}, + wantMem: 0, + wantNano: 0, + }, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + rt := newMockRuntime() + var seen runtime.RunSpec + rt.OnRunContainer = func(spec runtime.RunSpec) (*runtime.Container, error) { + seen = spec + return nil, nil + } + orch := NewOrchestrator(rt, "docker") + svc := composetypes.ServiceConfig{Name: "app", Image: "alpine"} + tc.mut(&svc) + proj := &composetypes.Project{Services: composetypes.Services{"app": svc}} + if _, err := orch.Up(context.Background(), &Plan{Project: proj, ProjectName: "dc-x"}); err != nil { + t.Fatalf("Up: %v", err) + } + if seen.MemoryBytes != tc.wantMem { + t.Errorf("MemoryBytes = %d, want %d", seen.MemoryBytes, tc.wantMem) + } + if seen.NanoCPUs != tc.wantNano { + t.Errorf("NanoCPUs = %d, want %d", seen.NanoCPUs, tc.wantNano) + } + }) + } +} + // TestDown_RemovesProjectNetwork pins the network-cleanup contract. // Up creates _default; Down must call RemoveNetwork on it // after containers are gone. Without this, every devcontainer diff --git a/compose/plan.go b/compose/plan.go index 58da583..23f5bcb 100644 --- a/compose/plan.go +++ b/compose/plan.go @@ -138,10 +138,7 @@ func refuseUnsupportedFields(proj *composetypes.Project) error { }) } if svc.Deploy != nil { - found = append(found, UnsupportedField{ - Service: name, Field: "deploy", - Reason: "Swarm orchestration; not implemented", - }) + found = append(found, deployUnsupported(name, svc.Deploy)...) } if svc.Develop != nil { found = append(found, UnsupportedField{ @@ -300,3 +297,102 @@ func refuseSharedVolumes(proj *composetypes.Project) error { } return nil } + +// deployUnsupported collects refusals for sub-fields of deploy: that +// this orchestrator can't honor. We accept deploy when it only carries +// resources.limits with memory/cpus — that's how compose v3+ users +// express per-service resource limits and it maps cleanly onto +// RunSpec.MemoryBytes / RunSpec.NanoCPUs. Everything else inside +// deploy: (replicas, mode, placement, update_config, rollback_config, +// restart_policy, endpoint_mode, labels, resources.reservations, +// non-memory/cpu limits) is Swarm-flavored and refused with a specific +// reason so the user sees what they need to drop. +func deployUnsupported(service string, d *composetypes.DeployConfig) []UnsupportedField { + var out []UnsupportedField + if m := d.Mode; m != "" && m != "replicated" { + out = append(out, UnsupportedField{ + Service: service, Field: "deploy.mode", + Reason: "only the implicit single-replica mode is supported", + }) + } + if r := d.Replicas; r != nil && *r != 1 { + out = append(out, UnsupportedField{ + Service: service, Field: "deploy.replicas", + Reason: "multi-replica services are not supported", + }) + } + if len(d.Labels) > 0 { + out = append(out, UnsupportedField{ + Service: service, Field: "deploy.labels", + Reason: "use service-level labels instead", + }) + } + if d.UpdateConfig != nil { + out = append(out, UnsupportedField{ + Service: service, Field: "deploy.update_config", + Reason: "Swarm rolling-update; not implemented", + }) + } + if d.RollbackConfig != nil { + out = append(out, UnsupportedField{ + Service: service, Field: "deploy.rollback_config", + Reason: "Swarm rolling-update; not implemented", + }) + } + if d.RestartPolicy != nil { + out = append(out, UnsupportedField{ + Service: service, Field: "deploy.restart_policy", + Reason: "use the top-level restart: field instead", + }) + } + if d.EndpointMode != "" { + out = append(out, UnsupportedField{ + Service: service, Field: "deploy.endpoint_mode", + Reason: "Swarm load balancing; not implemented", + }) + } + if len(d.Placement.Constraints) > 0 || len(d.Placement.Preferences) > 0 || d.Placement.MaxReplicas != 0 { + out = append(out, UnsupportedField{ + Service: service, Field: "deploy.placement", + Reason: "Swarm scheduling; not implemented", + }) + } + out = append(out, resourcesUnsupported(service, d.Resources)...) + return out +} + +// resourcesUnsupported refuses anything inside deploy.resources beyond +// limits.memory and limits.cpus. Reservations are silently dropped on +// our runtimes today (apple has no equivalent; docker honors them but +// we don't currently translate them), so refusing them surfaces the +// silent loss to the user. +func resourcesUnsupported(service string, r composetypes.Resources) []UnsupportedField { + var out []UnsupportedField + if r.Reservations != nil { + out = append(out, UnsupportedField{ + Service: service, Field: "deploy.resources.reservations", + Reason: "soft-limit reservations are not honored on this runtime", + }) + } + if r.Limits != nil { + if r.Limits.Pids != 0 { + out = append(out, UnsupportedField{ + Service: service, Field: "deploy.resources.limits.pids", + Reason: "pids limit is not implemented", + }) + } + if len(r.Limits.Devices) > 0 { + out = append(out, UnsupportedField{ + Service: service, Field: "deploy.resources.limits.devices", + Reason: "device requests are not implemented", + }) + } + if len(r.Limits.GenericResources) > 0 { + out = append(out, UnsupportedField{ + Service: service, Field: "deploy.resources.limits.generic_resources", + Reason: "generic resources are not implemented", + }) + } + } + return out +} diff --git a/compose/plan_test.go b/compose/plan_test.go index 70ed0af..8d10441 100644 --- a/compose/plan_test.go +++ b/compose/plan_test.go @@ -52,7 +52,7 @@ func TestValidate_RefusesSwarmFields(t *testing.T) { "app": composetypes.ServiceConfig{ Name: "app", Image: "alpine", - Deploy: &composetypes.DeployConfig{}, + Deploy: &composetypes.DeployConfig{Mode: "global"}, }, }, } diff --git a/runtime/applecontainer/lifecycle_darwin_arm64.go b/runtime/applecontainer/lifecycle_darwin_arm64.go index 91f9737..ed0a606 100644 --- a/runtime/applecontainer/lifecycle_darwin_arm64.go +++ b/runtime/applecontainer/lifecycle_darwin_arm64.go @@ -38,6 +38,8 @@ type runSpecJSON struct { InitProcess bool `json:"initProcess,omitempty"` CapAdd []string `json:"capAdd,omitempty"` OverrideCommand bool `json:"overrideCommand,omitempty"` + MemoryBytes int64 `json:"memoryBytes,omitempty"` + NanoCPUs int64 `json:"nanoCPUs,omitempty"` } type mountJSON struct { @@ -216,6 +218,8 @@ func runSpecToWire(spec runtime.RunSpec) runSpecJSON { InitProcess: spec.Init, CapAdd: spec.CapAdd, OverrideCommand: spec.OverrideCommand, + MemoryBytes: spec.MemoryBytes, + NanoCPUs: spec.NanoCPUs, } return out } diff --git a/runtime/docker/run.go b/runtime/docker/run.go index 847611b..2c4f6d8 100644 --- a/runtime/docker/run.go +++ b/runtime/docker/run.go @@ -57,6 +57,10 @@ func (r *Runtime) RunContainer(ctx context.Context, spec runtime.RunSpec) (*runt SecurityOpt: spec.SecurityOpt, PortBindings: bindings, RestartPolicy: toRestartPolicy(spec.RestartPolicy), + Resources: container.Resources{ + Memory: spec.MemoryBytes, + NanoCPUs: spec.NanoCPUs, + }, } if spec.Init { t := true diff --git a/runtime/runtime.go b/runtime/runtime.go index 796d60b..8f3784f 100644 --- a/runtime/runtime.go +++ b/runtime/runtime.go @@ -348,6 +348,26 @@ type RunSpec struct { // OverrideCommand, when true, forces Cmd to be ["/bin/sh","-c","while sleep 1000; do :; done"] // so the container stays alive for exec-based interaction. Spec default true. OverrideCommand bool + + // MemoryBytes is the hard memory limit for the container, in bytes. + // Zero means "unset": the backend's own default applies — for docker + // that's no cgroup limit; for apple it's the apiserver's per-VM + // default (1 GiB on 0.12.x). Negative values are rejected by the + // backend. + // + // On apple, this sizes the per-container VM at boot; the guest + // kernel sees exactly this much memory and the value cannot be + // resized without container recreation. On docker, this maps to + // HostConfig.Memory and is enforced by cgroups. + MemoryBytes int64 + + // NanoCPUs is the CPU limit expressed in nano-units: 1_000_000_000 + // = one full CPU, 2_500_000_000 = 2.5 CPUs. Matches docker's + // HostConfig.NanoCPUs convention so a single field works across + // backends. Zero means "unset". Apple's apiserver takes an integer + // CPU count, so the value is rounded up to the next whole CPU at + // the bridge boundary (e.g. 1_500_000_000 → 2 cpus). + NanoCPUs int64 } // PortBinding describes a host->container port publish. Translates