From 380703ba225bb7395f9504b97c3f6098aab31c28 Mon Sep 17 00:00:00 2001 From: bussyjd <145845+bussyjd@users.noreply.github.com> Date: Sun, 14 Jun 2026 10:26:43 +0400 Subject: [PATCH 01/11] =?UTF-8?q?feat(monetize):=20ServiceOffer=20type=3Dd?= =?UTF-8?q?ataset=20=E2=80=94=20CRD,=20catalog=20&=20x402=20extras=20wirin?= =?UTF-8?q?g?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit P1 of the decentralized fine-tuning plan: make a versioned dataset a first-class type=dataset ServiceOffer so the existing controller -> Middleware -> HTTPRoute -> ForwardAuth -> catalog pipeline publishes and gates it with no new serving code. Declarations + pipeline wiring + parity tests only; the dataset server/versionlog/download client are later phases. - CRD: dataset enum value; ServiceOfferDataset{manifestHash,version, fileHash,sizeBytes} spec block (mirrors ServiceOfferAgent); PriceTable.PerMB; IsDataset(); regenerated serviceoffer-crd.yaml + deepcopy. - x402: RouteRule.Dataset* fields; routeRuleFromOffer dataset branch (hex digests lowercased); effectivePrice perMB; mergeDatasetExtras() adds accepts[].extra.dataset{...} to the 402, wired after mergeAgentExtras. - catalog: schema type enum + perMB priceUnit + additive dataset* properties; ServiceCatalogEntry.Dataset*; buildServiceCatalogJSON population; perMB in offerPriceRawAndUnit + describeOfferPrice. - dataset folds to the http render branch in normalizeOfferType (a download is not chat-completions): generic 402 copy + bazaarGenericJSON, no bespoke copy. Version metadata reaches buyers only via extra.dataset. - parity tests across CRD fields/block, mergeDatasetExtras, route-rule, catalog surface + omitempty, bazaar, fallbackOfferType, describeOfferPrice (incl. perMB precedence), and the HTML 402 copy fold. go build/vet, full go test ./..., and just generate (idempotent) all green. --- internal/embed/embed_crd_test.go | 58 +++++++++++- .../base/templates/serviceoffer-crd.yaml | 35 +++++++- internal/monetizeapi/types.go | 41 ++++++++- internal/monetizeapi/zz_generated.deepcopy.go | 16 ++++ internal/schemas/service-catalog.schema.json | 21 ++++- internal/schemas/service_catalog.go | 9 ++ internal/serviceoffercontroller/render.go | 16 +++- .../render_builders_test.go | 33 +++++++ .../serviceoffercontroller/render_test.go | 90 +++++++++++++++++++ internal/x402/bazaar_test.go | 3 +- internal/x402/config.go | 9 ++ internal/x402/dataset_extras_test.go | 89 ++++++++++++++++++ internal/x402/paymentrequired_test.go | 22 +++++ internal/x402/serviceoffer_source.go | 11 +++ internal/x402/serviceoffer_source_test.go | 50 +++++++++++ internal/x402/verifier.go | 28 ++++++ 16 files changed, 518 insertions(+), 13 deletions(-) create mode 100644 internal/x402/dataset_extras_test.go diff --git a/internal/embed/embed_crd_test.go b/internal/embed/embed_crd_test.go index e8b45085..67f3b7e9 100644 --- a/internal/embed/embed_crd_test.go +++ b/internal/embed/embed_crd_test.go @@ -149,8 +149,9 @@ func TestServiceOfferCRD_Fields(t *testing.T) { } // Required fields in spec (aligned with x402/ERC-8004 schema). agent - // joins this list as part of the type=agent offer flow. - for _, field := range []string{"type", "agent", "model", "upstream", "payment", "path", "registration"} { + // joins this list as part of the type=agent offer flow; dataset joins + // it for the type=dataset offer flow. + for _, field := range []string{"type", "agent", "dataset", "model", "upstream", "payment", "path", "registration"} { if _, exists := pm[field]; !exists { t.Errorf("spec.properties missing field %q", field) } @@ -162,13 +163,64 @@ func TestServiceOfferCRD_Fields(t *testing.T) { for _, v := range enum { got[v.(string)] = true } - for _, want := range []string{"inference", "fine-tuning", "http", "agent"} { + for _, want := range []string{"inference", "fine-tuning", "http", "agent", "dataset"} { if !got[want] { t.Errorf("spec.type.enum missing %q", want) } } } +// TestServiceOfferCRD_DatasetFields pins the type=dataset schema surface: +// the spec.dataset block carries the pinned artifact metadata (mirroring +// spec.agent), and price.perMB enables per-megabyte pricing. Mirrors +// TestServiceOfferCRD_Fields' navigation. +func TestServiceOfferCRD_DatasetFields(t *testing.T) { + data, err := ReadInfrastructureFile("base/templates/serviceoffer-crd.yaml") + if err != nil { + t.Fatalf("ReadInfrastructureFile: %v", err) + } + + crd := findDoc(multiDoc(data), "CustomResourceDefinition") + if crd == nil { + t.Fatal("no CRD document found") + } + + versions, ok := nested(crd, "spec", "versions").([]any) + if !ok || len(versions) == 0 { + t.Fatal("spec.versions is empty or wrong type") + } + v0, ok := versions[0].(map[string]any) + if !ok { + t.Fatal("versions[0] is not a map") + } + + specProps, ok := nested(v0, "schema", "openAPIV3Schema", "properties", "spec", "properties").(map[string]any) + if !ok { + t.Fatal("spec.properties is not a map") + } + + datasetProps, ok := nested(specProps, "dataset", "properties").(map[string]any) + if !ok { + t.Fatal("spec.dataset.properties missing — type=dataset offers can't pin a version") + } + for _, field := range []string{"manifestHash", "version", "fileHash", "sizeBytes"} { + if _, exists := datasetProps[field]; !exists { + t.Errorf("spec.dataset.properties missing field %q", field) + } + } + if sb, ok := datasetProps["sizeBytes"].(map[string]any); ok && sb["type"] != "integer" { + t.Errorf("spec.dataset.sizeBytes type = %v, want integer", sb["type"]) + } + + priceProps, ok := nested(specProps, "payment", "properties", "price", "properties").(map[string]any) + if !ok { + t.Fatal("spec.payment.price.properties missing") + } + if _, exists := priceProps["perMB"]; !exists { + t.Error("spec.payment.price.properties missing perMB — dataset offers can't price per-MB") + } +} + func TestServiceOfferCRD_PrinterColumns(t *testing.T) { data, err := ReadInfrastructureFile("base/templates/serviceoffer-crd.yaml") if err != nil { diff --git a/internal/embed/infrastructure/base/templates/serviceoffer-crd.yaml b/internal/embed/infrastructure/base/templates/serviceoffer-crd.yaml index 7b67e13a..274cd539 100644 --- a/internal/embed/infrastructure/base/templates/serviceoffer-crd.yaml +++ b/internal/embed/infrastructure/base/templates/serviceoffer-crd.yaml @@ -79,6 +79,34 @@ spec: - namespace type: object type: object + dataset: + description: |- + Populated when type='dataset'. Pins the versioned dataset artifact + (an export bundle) the offer sells: the content-address anchor + (manifestHash), the published version, and the artifact size. The + controller surfaces these in the 402 response's extra.dataset block. + properties: + fileHash: + description: |- + SHA-256 of the served file, for whole-file integrity verification + after download. + type: string + manifestHash: + description: |- + Content-address anchor of the artifact: the export bundle + manifestHash (SHA-256 over the artifact contents). + type: string + sizeBytes: + description: Size of the served artifact in bytes. Drives per-MB + pricing. + format: int64 + minimum: 0 + type: integer + version: + description: Monotonic published version tag of the dataset (e.g. + "1", "2"). + type: string + type: object drainAt: description: |- DrainAt marks the offer as draining when non-nil. While the offer @@ -178,6 +206,9 @@ spec: perHour: description: Per-compute-hour price in USDC. Fine-tuning only. type: string + perMB: + description: Per-megabyte price in USDC. Dataset only. + type: string perMTok: description: Per-million-tokens price in USDC. Inference only. type: string @@ -278,12 +309,14 @@ spec: description: |- Service type. 'inference' enables model management; 'http' for any HTTP service; 'agent' references an Agent CR via spec.agent.ref and the - controller derives upstream + model + skills from the agent's status. + controller derives upstream + model + skills from the agent's status; + 'dataset' sells a versioned dataset artifact via spec.dataset. enum: - inference - fine-tuning - http - agent + - dataset type: string upstream: description: In-cluster service that handles the actual workload. diff --git a/internal/monetizeapi/types.go b/internal/monetizeapi/types.go index 2efb439b..18db62ae 100644 --- a/internal/monetizeapi/types.go +++ b/internal/monetizeapi/types.go @@ -101,9 +101,10 @@ type ServiceOfferList struct { type ServiceOfferSpec struct { // Service type. 'inference' enables model management; 'http' for any HTTP // service; 'agent' references an Agent CR via spec.agent.ref and the - // controller derives upstream + model + skills from the agent's status. + // controller derives upstream + model + skills from the agent's status; + // 'dataset' sells a versioned dataset artifact via spec.dataset. // +kubebuilder:default="http" - // +kubebuilder:validation:Enum=inference;fine-tuning;http;agent + // +kubebuilder:validation:Enum=inference;fine-tuning;http;agent;dataset Type string `json:"type,omitempty"` // Required when type='agent'. The controller resolves spec.agent.ref to @@ -111,6 +112,12 @@ type ServiceOfferSpec struct { // and surfaces the agent's pinned model + skills in the 402 response. Agent ServiceOfferAgent `json:"agent,omitempty"` + // Populated when type='dataset'. Pins the versioned dataset artifact + // (an export bundle) the offer sells: the content-address anchor + // (manifestHash), the published version, and the artifact size. The + // controller surfaces these in the 402 response's extra.dataset block. + Dataset ServiceOfferDataset `json:"dataset,omitempty"` + // LLM model metadata. Required when the upstream serves an LLM. Model ServiceOfferModel `json:"model,omitempty"` @@ -164,6 +171,24 @@ type ServiceOfferAgentRef struct { Namespace string `json:"namespace"` } +// ServiceOfferDataset is populated when Spec.Type == "dataset". It pins the +// versioned dataset artifact (an export bundle) the offer sells. The +// controller surfaces these fields in the 402 response's extra.dataset block +// so buyers see exactly which content-addressed version they're paying for. +type ServiceOfferDataset struct { + // Content-address anchor of the artifact: the export bundle + // manifestHash (SHA-256 over the artifact contents). + ManifestHash string `json:"manifestHash,omitempty"` + // Monotonic published version tag of the dataset (e.g. "1", "2"). + Version string `json:"version,omitempty"` + // SHA-256 of the served file, for whole-file integrity verification + // after download. + FileHash string `json:"fileHash,omitempty"` + // Size of the served artifact in bytes. Drives per-MB pricing. + // +kubebuilder:validation:Minimum=0 + SizeBytes int64 `json:"sizeBytes,omitempty"` +} + type ServiceOfferModel struct { // Model identifier (e.g. qwen3.5:35b). // +kubebuilder:validation:Required @@ -245,6 +270,8 @@ type ServiceOfferPriceTable struct { PerHour string `json:"perHour,omitempty"` // Per-training-epoch price in USDC. Fine-tuning only. PerEpoch string `json:"perEpoch,omitempty"` + // Per-megabyte price in USDC. Dataset only. + PerMB string `json:"perMB,omitempty"` } type ServiceOfferRegistration struct { @@ -423,6 +450,13 @@ func (o *ServiceOffer) IsAgent() bool { return o.Spec.Type == "agent" } +// IsDataset reports whether the offer sells a versioned dataset artifact. +// Type=="dataset" is the only signal; spec.dataset carries the pinned +// version metadata surfaced in the 402 extra block. +func (o *ServiceOffer) IsDataset() bool { + return o.Spec.Type == "dataset" +} + // IsDraining reports whether spec.drainAt has been set. Drained offers // transition through three phases: pre-drain (DrainAt nil), draining // (DrainAt set, now < DrainEndsAt), and drain-expired (DrainAt set, @@ -724,8 +758,7 @@ type AgentIdentityList struct { Items []AgentIdentity `json:"items"` } -type AgentIdentitySpec struct { -} +type AgentIdentitySpec struct{} type AgentIdentityStatus struct { // Per-chain ERC-8004 registrations for this identity document. diff --git a/internal/monetizeapi/zz_generated.deepcopy.go b/internal/monetizeapi/zz_generated.deepcopy.go index 3c0207f3..a541cc81 100644 --- a/internal/monetizeapi/zz_generated.deepcopy.go +++ b/internal/monetizeapi/zz_generated.deepcopy.go @@ -570,6 +570,21 @@ func (in *ServiceOfferAsset) DeepCopy() *ServiceOfferAsset { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ServiceOfferDataset) DeepCopyInto(out *ServiceOfferDataset) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ServiceOfferDataset. +func (in *ServiceOfferDataset) DeepCopy() *ServiceOfferDataset { + if in == nil { + return nil + } + out := new(ServiceOfferDataset) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *ServiceOfferList) DeepCopyInto(out *ServiceOfferList) { *out = *in @@ -710,6 +725,7 @@ func (in *ServiceOfferService) DeepCopy() *ServiceOfferService { func (in *ServiceOfferSpec) DeepCopyInto(out *ServiceOfferSpec) { *out = *in out.Agent = in.Agent + out.Dataset = in.Dataset out.Model = in.Model out.Upstream = in.Upstream out.Payment = in.Payment diff --git a/internal/schemas/service-catalog.schema.json b/internal/schemas/service-catalog.schema.json index 5a9808d2..1184341a 100644 --- a/internal/schemas/service-catalog.schema.json +++ b/internal/schemas/service-catalog.schema.json @@ -95,7 +95,8 @@ "inference", "fine-tuning", "http", - "agent" + "agent", + "dataset" ] }, "model": { @@ -119,7 +120,8 @@ "perRequest", "perMTok", "perHour", - "perEpoch" + "perEpoch", + "perMB" ] }, "priceAtomicUnits": { @@ -165,6 +167,21 @@ "type": "string", "format": "date-time", "description": "RFC3339 timestamp at which the offer's HTTPRoute will be torn down. Set only when the offer is draining. Catalog consumers should detect drain via the presence of this field." + }, + "datasetManifestHash": { + "type": "string", + "minLength": 1, + "description": "type=dataset only: content-address anchor (export bundle manifestHash, SHA-256) of the sold artifact." + }, + "datasetVersion": { + "type": "string", + "minLength": 1, + "description": "type=dataset only: monotonic published version tag of the dataset." + }, + "datasetSizeBytes": { + "type": "integer", + "minimum": 0, + "description": "type=dataset only: size of the served artifact in bytes." } } } diff --git a/internal/schemas/service_catalog.go b/internal/schemas/service_catalog.go index 6423b31d..5a1baef7 100644 --- a/internal/schemas/service_catalog.go +++ b/internal/schemas/service_catalog.go @@ -53,6 +53,15 @@ type ServiceCatalogEntry struct { // purely additive vs. pre-drain catalogs. Buyers SHOULD migrate to // alternative providers before this time. DrainEndsAt string `json:"drainEndsAt,omitempty"` + + // Dataset* fields are populated for type=dataset offers, mirroring how + // Model is populated for inference/agent. They expose the pinned, + // content-addressed dataset version on discovery surfaces so buyers + // know exactly which artifact (and version) an offer sells. Additive + // only — see the stable-wire-schema note above. + DatasetManifestHash string `json:"datasetManifestHash,omitempty"` + DatasetVersion string `json:"datasetVersion,omitempty"` + DatasetSizeBytes int64 `json:"datasetSizeBytes,omitempty"` } // ServiceCatalogAsset describes the settlement token resolved for a catalog diff --git a/internal/serviceoffercontroller/render.go b/internal/serviceoffercontroller/render.go index 3e2506c6..af6ac4a3 100644 --- a/internal/serviceoffercontroller/render.go +++ b/internal/serviceoffercontroller/render.go @@ -1162,6 +1162,14 @@ func buildServiceCatalogJSON(offers []*monetizeapi.ServiceOffer, baseURL string) } } + // type=dataset offers surface the pinned, content-addressed version + // on discovery, mirroring how Model is surfaced for inference/agent. + if offer.IsDataset() { + svc.DatasetManifestHash = offer.Spec.Dataset.ManifestHash + svc.DatasetVersion = offer.Spec.Dataset.Version + svc.DatasetSizeBytes = offer.Spec.Dataset.SizeBytes + } + services = append(services, svc) } @@ -1173,8 +1181,8 @@ func buildServiceCatalogJSON(offers []*monetizeapi.ServiceOffer, baseURL string) } // offerPriceRawAndUnit returns the raw decimal price string and which slot it -// occupies in the price table. Only one of perRequest / perMTok / perHour is -// expected to be set on a given offer. +// occupies in the price table. Only one of perRequest / perMTok / perHour / +// perMB is expected to be set on a given offer. func offerPriceRawAndUnit(offer *monetizeapi.ServiceOffer) (string, string) { switch { case offer.Spec.Payment.Price.PerRequest != "": @@ -1183,6 +1191,8 @@ func offerPriceRawAndUnit(offer *monetizeapi.ServiceOffer) (string, string) { return offer.Spec.Payment.Price.PerMTok, "perMTok" case offer.Spec.Payment.Price.PerHour != "": return offer.Spec.Payment.Price.PerHour, "perHour" + case offer.Spec.Payment.Price.PerMB != "": + return offer.Spec.Payment.Price.PerMB, "perMB" default: return "", "" } @@ -1352,6 +1362,8 @@ func describeOfferPrice(offer *monetizeapi.ServiceOffer) string { return offer.Spec.Payment.Price.PerMTok + " " + symbol + "/MTok" case offer.Spec.Payment.Price.PerHour != "": return offer.Spec.Payment.Price.PerHour + " " + symbol + "/hour" + case offer.Spec.Payment.Price.PerMB != "": + return offer.Spec.Payment.Price.PerMB + " " + symbol + "/MB" default: return "—" } diff --git a/internal/serviceoffercontroller/render_builders_test.go b/internal/serviceoffercontroller/render_builders_test.go index 5c3bf105..c750f135 100644 --- a/internal/serviceoffercontroller/render_builders_test.go +++ b/internal/serviceoffercontroller/render_builders_test.go @@ -312,6 +312,37 @@ func TestDescribeOfferPrice(t *testing.T) { }, want: "0.001 USDC/request", }, + { + name: "per-mb label for dataset offers", + spec: monetizeapi.ServiceOfferSpec{ + Payment: monetizeapi.ServiceOfferPayment{ + Price: monetizeapi.ServiceOfferPriceTable{PerMB: "0.01"}, + }, + }, + want: "0.01 USDC/MB", + }, + { + // perMB is last in the precedence chain + // (perRequest > perMTok > perHour > perMB): a malformed offer + // with both set must surface the higher-priority unit. + name: "per-hour wins over per-mb", + spec: monetizeapi.ServiceOfferSpec{ + Payment: monetizeapi.ServiceOfferPayment{ + Price: monetizeapi.ServiceOfferPriceTable{PerHour: "2.5", PerMB: "0.01"}, + }, + }, + want: "2.5 USDC/hour", + }, + { + name: "per-mb surfaces the OBOL symbol", + spec: monetizeapi.ServiceOfferSpec{ + Payment: monetizeapi.ServiceOfferPayment{ + Asset: monetizeapi.ServiceOfferAsset{Symbol: "OBOL"}, + Price: monetizeapi.ServiceOfferPriceTable{PerMB: "0.01"}, + }, + }, + want: "0.01 OBOL/MB", + }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { @@ -401,6 +432,8 @@ func TestFallbackOfferType(t *testing.T) { {"inference", "inference"}, {"http", "http"}, {"fine-tuning", "fine-tuning"}, + {"agent", "agent"}, + {"dataset", "dataset"}, } for _, tt := range tests { t.Run(tt.in, func(t *testing.T) { diff --git a/internal/serviceoffercontroller/render_test.go b/internal/serviceoffercontroller/render_test.go index a68d7061..395200eb 100644 --- a/internal/serviceoffercontroller/render_test.go +++ b/internal/serviceoffercontroller/render_test.go @@ -850,6 +850,96 @@ func TestBuildServiceCatalogJSON_AgentOfferUsesResolvedModel(t *testing.T) { } } +// TestBuildServiceCatalogJSON_DatasetOfferSurfacesVersion pins that a +// type=dataset offer surfaces its pinned, content-addressed version + per-MB +// price on the public catalog, mirroring how an agent offer surfaces its +// resolved model. +func TestBuildServiceCatalogJSON_DatasetOfferSurfacesVersion(t *testing.T) { + offer := &monetizeapi.ServiceOffer{ + ObjectMeta: metav1.ObjectMeta{Name: "pi-sessions", Namespace: "llm"}, + Spec: monetizeapi.ServiceOfferSpec{ + Type: "dataset", + Dataset: monetizeapi.ServiceOfferDataset{ + ManifestHash: "abc123", + Version: "2", + SizeBytes: 1048576, + }, + Payment: monetizeapi.ServiceOfferPayment{ + Network: "base-sepolia", + PayTo: "0x1111111111111111111111111111111111111111", + Price: monetizeapi.ServiceOfferPriceTable{PerMB: "0.01"}, + }, + Registration: monetizeapi.ServiceOfferRegistration{ + Description: "Sanitized coding-session dataset", + }, + }, + Status: monetizeapi.ServiceOfferStatus{ + Conditions: []monetizeapi.Condition{{Type: "Ready", Status: "True"}}, + }, + } + + jsonStr := buildServiceCatalogJSON([]*monetizeapi.ServiceOffer{offer}, "https://seller.example") + assertServiceCatalogSchema(t, jsonStr) + + var services []schemas.ServiceCatalogEntry + if err := json.Unmarshal([]byte(jsonStr), &services); err != nil { + t.Fatalf("invalid JSON: %v\n%s", err, jsonStr) + } + if len(services) != 1 { + t.Fatalf("expected 1 service, got %d: %s", len(services), jsonStr) + } + svc := services[0] + if svc.Type != "dataset" { + t.Errorf("type = %q, want dataset", svc.Type) + } + if svc.DatasetManifestHash != "abc123" { + t.Errorf("datasetManifestHash = %q, want abc123", svc.DatasetManifestHash) + } + if svc.DatasetVersion != "2" { + t.Errorf("datasetVersion = %q, want 2", svc.DatasetVersion) + } + if svc.DatasetSizeBytes != 1048576 { + t.Errorf("datasetSizeBytes = %d, want 1048576", svc.DatasetSizeBytes) + } + if svc.Price != "0.01 USDC/MB" { + t.Errorf("price = %q, want 0.01 USDC/MB", svc.Price) + } + if svc.PriceUnit != "perMB" { + t.Errorf("priceUnit = %q, want perMB", svc.PriceUnit) + } +} + +// TestBuildServiceCatalogJSON_NonDatasetOmitsDatasetFields pins the omitempty +// contract: a non-dataset offer must never carry dataset metadata. +func TestBuildServiceCatalogJSON_NonDatasetOmitsDatasetFields(t *testing.T) { + offer := &monetizeapi.ServiceOffer{ + ObjectMeta: metav1.ObjectMeta{Name: "infer", Namespace: "llm"}, + Spec: monetizeapi.ServiceOfferSpec{ + Type: "inference", + Model: monetizeapi.ServiceOfferModel{Name: "qwen3.5:9b"}, + Payment: monetizeapi.ServiceOfferPayment{ + Network: "base-sepolia", + PayTo: "0x1111111111111111111111111111111111111111", + Price: monetizeapi.ServiceOfferPriceTable{PerRequest: "0.001"}, + }, + }, + Status: monetizeapi.ServiceOfferStatus{ + Conditions: []monetizeapi.Condition{{Type: "Ready", Status: "True"}}, + }, + } + + jsonStr := buildServiceCatalogJSON([]*monetizeapi.ServiceOffer{offer}, "https://seller.example") + + var services []schemas.ServiceCatalogEntry + if err := json.Unmarshal([]byte(jsonStr), &services); err != nil { + t.Fatalf("invalid JSON: %v\n%s", err, jsonStr) + } + svc := services[0] + if svc.DatasetManifestHash != "" || svc.DatasetVersion != "" || svc.DatasetSizeBytes != 0 { + t.Errorf("inference offer must omit dataset fields, got %+v", svc) + } +} + // TestBuildServiceCatalogJSON_ExcludesNonReady locks in the filter pipeline: // nil offers, drain-expired offers, and offers with a DeletionTimestamp // must never leak onto the public storefront, even if they carry diff --git a/internal/x402/bazaar_test.go b/internal/x402/bazaar_test.go index e1269af0..9bb4b6ef 100644 --- a/internal/x402/bazaar_test.go +++ b/internal/x402/bazaar_test.go @@ -36,7 +36,8 @@ func TestBuildBazaarExtension(t *testing.T) { {"agent", "qwen3.5:9b", "qwen3.5:9b"}, {"inference", "", "your-model-id"}, {"http", "", ""}, - {"", "", ""}, // static config routes fall back to the generic shape + {"dataset", "training-v1", ""}, // dataset is a download → generic shape, no chat model + {"", "", ""}, // static config routes fall back to the generic shape } { ext := BuildBazaarExtension(tc.offerType, tc.model) diff --git a/internal/x402/config.go b/internal/x402/config.go index a878ac48..e6ac3f4c 100644 --- a/internal/x402/config.go +++ b/internal/x402/config.go @@ -129,6 +129,15 @@ type RouteRule struct { // `obol buy inference --model ...` command. Model string `yaml:"model,omitempty"` + // Dataset* fields carry the pinned dataset artifact metadata for + // type=dataset offers, mirroring how Agent* fields carry agent + // metadata. Surfaced in `accepts[].extra.dataset` so buyers see which + // content-addressed version they're paying for. + DatasetManifestHash string `yaml:"datasetManifestHash,omitempty"` + DatasetVersion string `yaml:"datasetVersion,omitempty"` + DatasetFileHash string `yaml:"datasetFileHash,omitempty"` + DatasetSizeBytes int64 `yaml:"datasetSizeBytes,omitempty"` + // MaxTimeoutSeconds is the per-request settle window advertised to // buyers (x402: maxTimeoutSeconds). Mirrors // ServiceOffer.spec.payment.maxTimeoutSeconds; 0 = use the verifier diff --git a/internal/x402/dataset_extras_test.go b/internal/x402/dataset_extras_test.go new file mode 100644 index 00000000..e3c580b0 --- /dev/null +++ b/internal/x402/dataset_extras_test.go @@ -0,0 +1,89 @@ +package x402 + +import ( + "testing" + + x402types "github.com/x402-foundation/x402/go/types" +) + +func TestMergeDatasetExtras_Noop_NonDatasetRule(t *testing.T) { + req := x402types.PaymentRequirements{Extra: map[string]any{"name": "USDC"}} + rule := &RouteRule{} + + mergeDatasetExtras(&req, rule) + + if _, ok := req.Extra["dataset"]; ok { + t.Error("non-dataset rule must not add a dataset extra") + } + if got := req.Extra["name"]; got != "USDC" { + t.Errorf("non-dataset merge clobbered existing extra.name: %v", got) + } +} + +func TestMergeDatasetExtras_AddsAllDatasetFields(t *testing.T) { + // Preserve an asset EIP-712 key already on Extra to prove the merge is + // additive (mirrors how mergeAgentExtras must not clobber extra.name). + req := x402types.PaymentRequirements{Extra: map[string]any{"name": "USDC"}} + rule := &RouteRule{ + DatasetManifestHash: "abc123", + DatasetVersion: "2", + DatasetFileHash: "def456", + DatasetSizeBytes: 1048576, + } + + mergeDatasetExtras(&req, rule) + + dataset, ok := req.Extra["dataset"].(map[string]interface{}) + if !ok { + t.Fatalf("extra.dataset wrong type: %T", req.Extra["dataset"]) + } + if got := dataset["manifestHash"]; got != "abc123" { + t.Errorf("dataset.manifestHash = %v, want abc123", got) + } + if got := dataset["version"]; got != "2" { + t.Errorf("dataset.version = %v, want 2", got) + } + if got := dataset["fileHash"]; got != "def456" { + t.Errorf("dataset.fileHash = %v, want def456", got) + } + if got := dataset["sizeBytes"]; got != int64(1048576) { + t.Errorf("dataset.sizeBytes = %v (%T), want int64(1048576)", got, got) + } + if got := req.Extra["name"]; got != "USDC" { + t.Errorf("dataset merge clobbered existing extra.name: %v", got) + } +} + +func TestMergeDatasetExtras_InitialisesNilExtra(t *testing.T) { + // BuildV2RequirementWithAsset always returns a non-nil Extra, but + // mergeDatasetExtras must still cope with a nil map for callers that + // build PaymentRequirements directly (e.g. tests). + req := x402types.PaymentRequirements{} + rule := &RouteRule{DatasetManifestHash: "abc123"} + + mergeDatasetExtras(&req, rule) + + if req.Extra == nil { + t.Fatal("Extra not initialised") + } + dataset, ok := req.Extra["dataset"].(map[string]interface{}) + if !ok || dataset["manifestHash"] != "abc123" { + t.Errorf("dataset.manifestHash missing: %+v", req.Extra) + } +} + +func TestMergeDatasetExtras_OmitsZeroValuedFields(t *testing.T) { + // Only a manifestHash is set; the empty version/fileHash and zero + // sizeBytes must be omitted so buyers don't see blank keys. + req := x402types.PaymentRequirements{} + rule := &RouteRule{DatasetManifestHash: "abc123"} + + mergeDatasetExtras(&req, rule) + + dataset := req.Extra["dataset"].(map[string]interface{}) + for _, k := range []string{"version", "fileHash", "sizeBytes"} { + if _, ok := dataset[k]; ok { + t.Errorf("dataset.%s should be omitted when unset, got %v", k, dataset[k]) + } + } +} diff --git a/internal/x402/paymentrequired_test.go b/internal/x402/paymentrequired_test.go index 4105c629..d62b751d 100644 --- a/internal/x402/paymentrequired_test.go +++ b/internal/x402/paymentrequired_test.go @@ -287,6 +287,28 @@ func TestHTMLAware_HTTPKeepsLegacyCopy(t *testing.T) { } } +// Dataset offers have no bespoke 402 copy in P1: normalizeOfferType folds +// "dataset" into the "http" render branch, so the page shows the generic +// Pay-with-Obol CTA, not the inference CLI card. Dataset version metadata +// reaches buyers via accepts[].extra.dataset, not the HTML copy. +func TestHTMLAware_DatasetUsesGenericHTTPCopy(t *testing.T) { + d := sampleDisplay() + d.OfferType = "dataset" + + render := NewHTMLAwarePaymentRequired(d) + r := httptest.NewRequest("GET", "/services/pi-sessions", nil) + r.Header.Set("Accept", "text/html") + w := httptest.NewRecorder() + render(w, r, []x402types.PaymentRequirements{sampleRequirement()}, nil) + + body := w.Body.String() + mustContain(t, body, "Pay with your Obol Agent") + mustContain(t, body, "buy-x402 skill") + if strings.Contains(body, "obol buy inference") { + t.Errorf("dataset-type 402 page should NOT show the inference CLI primary card") + } +} + func TestFormatAmount(t *testing.T) { cases := []struct { atomic string diff --git a/internal/x402/serviceoffer_source.go b/internal/x402/serviceoffer_source.go index 2983f5ba..e44a3197 100644 --- a/internal/x402/serviceoffer_source.go +++ b/internal/x402/serviceoffer_source.go @@ -191,6 +191,15 @@ func routeRuleFromOffer(offer *monetizeapi.ServiceOffer, upstreamAuth string) (R rule.Model = offer.Spec.Model.Name } + if offer.IsDataset() { + // Normalize the hex digests so buyers can byte-compare the advertised + // values against a freshly computed SHA-256 regardless of operator casing. + rule.DatasetManifestHash = strings.ToLower(offer.Spec.Dataset.ManifestHash) + rule.DatasetVersion = offer.Spec.Dataset.Version + rule.DatasetFileHash = strings.ToLower(offer.Spec.Dataset.FileHash) + rule.DatasetSizeBytes = offer.Spec.Dataset.SizeBytes + } + return rule, nil } @@ -206,6 +215,8 @@ func effectivePrice(offer *monetizeapi.ServiceOffer) (price, priceModel, perMTok return price, "perMTok", offer.Spec.Payment.Price.PerMTok, schemas.ApproxTokensPerRequest, nil case offer.Spec.Payment.Price.PerHour != "": return offer.Spec.Payment.Price.PerHour, "perHour", "", 0, nil + case offer.Spec.Payment.Price.PerMB != "": + return offer.Spec.Payment.Price.PerMB, "perMB", "", 0, nil default: return "0", "", "", 0, nil } diff --git a/internal/x402/serviceoffer_source_test.go b/internal/x402/serviceoffer_source_test.go index 6865fe10..90c28751 100644 --- a/internal/x402/serviceoffer_source_test.go +++ b/internal/x402/serviceoffer_source_test.go @@ -200,6 +200,56 @@ func TestRouteRuleFromOffer_AgentResolutionAdvertisesRuntimeModelSkills(t *testi } } +// TestRouteRuleFromOffer_DatasetAdvertisesDatasetMetadata pins that a +// type=dataset offer plumbs spec.dataset onto the RouteRule (so the verifier +// can surface accepts[].extra.dataset in the 402) and prices via perMB. +// Mirrors the agent-resolution route-rule test. +func TestRouteRuleFromOffer_DatasetAdvertisesDatasetMetadata(t *testing.T) { + offer := &monetizeapi.ServiceOffer{ + ObjectMeta: metav1.ObjectMeta{Name: "pi-sessions", Namespace: "llm"}, + Spec: monetizeapi.ServiceOfferSpec{ + Type: "dataset", + Dataset: monetizeapi.ServiceOfferDataset{ + ManifestHash: "ABC123", + Version: "2", + FileHash: "DEF456", + SizeBytes: 1048576, + }, + Payment: monetizeapi.ServiceOfferPayment{ + Network: "base-sepolia", + PayTo: "0x1111111111111111111111111111111111111111", + Price: monetizeapi.ServiceOfferPriceTable{PerMB: "0.01"}, + }, + }, + } + + route, err := routeRuleFromOffer(offer, "") + if err != nil { + t.Fatalf("routeRuleFromOffer: %v", err) + } + if route.OfferType != "dataset" { + t.Errorf("OfferType = %q, want dataset", route.OfferType) + } + if route.DatasetManifestHash != "abc123" { + t.Errorf("DatasetManifestHash = %q, want abc123 (lowercased)", route.DatasetManifestHash) + } + if route.DatasetVersion != "2" { + t.Errorf("DatasetVersion = %q, want 2", route.DatasetVersion) + } + if route.DatasetFileHash != "def456" { + t.Errorf("DatasetFileHash = %q, want def456 (lowercased)", route.DatasetFileHash) + } + if route.DatasetSizeBytes != 1048576 { + t.Errorf("DatasetSizeBytes = %d, want 1048576", route.DatasetSizeBytes) + } + if route.Price != "0.01" { + t.Errorf("Price = %q, want 0.01 (from perMB)", route.Price) + } + if route.Pattern != "/services/pi-sessions/*" { + t.Errorf("Pattern = %q, want /services/pi-sessions/*", route.Pattern) + } +} + // TestRouteRuleFromOffer_PlumbsMaxTimeoutSeconds pins the regression where // ServiceOffer.spec.payment.maxTimeoutSeconds was silently dropped on the // floor — the verifier hardcoded 60 in BuildV2RequirementWithAsset and diff --git a/internal/x402/verifier.go b/internal/x402/verifier.go index 29f4451d..9cc54541 100644 --- a/internal/x402/verifier.go +++ b/internal/x402/verifier.go @@ -333,6 +333,7 @@ func (v *Verifier) matchPaidRouteFull(cfg *PricingConfig, uri string) (*RouteRul asset := ResolveAssetInfo(chain, rule) requirement := BuildV2RequirementWithAsset(chain, asset, rule.Price, wallet, rule.MaxTimeoutSeconds) mergeAgentExtras(&requirement, rule) + mergeDatasetExtras(&requirement, rule) extensions := WithBazaar(BuildExtensionsForAsset(asset), rule.OfferType, rule.Model) return rule, requirement, extensions, prometheusLabels(rule), chain, asset, true } @@ -397,6 +398,33 @@ func mergeAgentExtras(req *x402types.PaymentRequirements, rule *RouteRule) { } } +// mergeDatasetExtras adds the dataset fields from a RouteRule to the +// requirement's Extra map under "dataset" so buyers probing a 402 see +// exactly which content-addressed dataset version they're paying for. No-op +// for non-dataset rules. +func mergeDatasetExtras(req *x402types.PaymentRequirements, rule *RouteRule) { + if rule.DatasetManifestHash == "" && rule.DatasetVersion == "" && rule.DatasetFileHash == "" && rule.DatasetSizeBytes == 0 { + return + } + if req.Extra == nil { + req.Extra = make(map[string]interface{}) + } + dataset := make(map[string]interface{}) + if rule.DatasetManifestHash != "" { + dataset["manifestHash"] = rule.DatasetManifestHash + } + if rule.DatasetVersion != "" { + dataset["version"] = rule.DatasetVersion + } + if rule.DatasetFileHash != "" { + dataset["fileHash"] = rule.DatasetFileHash + } + if rule.DatasetSizeBytes > 0 { + dataset["sizeBytes"] = rule.DatasetSizeBytes + } + req.Extra["dataset"] = dataset +} + // buildPaymentDisplay turns the matched rule + chain + asset into pre-formatted // strings for the HTML 402 page. The atomic-amount input is the value already // computed for the wire requirement (rule.Price * 10^decimals), so passing From 0d8b2d616c2e827f0f85bfedd59ee9417aa05a41 Mon Sep 17 00:00:00 2001 From: bussyjd <145845+bussyjd@users.noreply.github.com> Date: Sun, 14 Jun 2026 18:40:57 +0400 Subject: [PATCH 02/11] =?UTF-8?q?chore(integration):=20v0.11-rc=20?= =?UTF-8?q?=E2=80=94=20all=20open=20PRs=20>=3D605=20integrated?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Squashed integration of every >=605 feature PR for combined-stack testing, rebuilt on top of the clean #640 so the history carries no third-party name. Tree-equivalent to merging: - #640 type=dataset CRD/catalog/x402 wiring - #632 skill marketplace; #634 ServiceBounty eval-market; #635 escrow - #633 sell smoke-test agent; #636 flow-12 portability - #638 BYOK provider registry - #639 decentralized auto-research; #641 dataset subsystem (P2-P6) - #637 obol node multinode - #605/#606/#608 MPP credit-card path (import repointed to x402-foundation) Excludes #617 (codex/) and the obol-router (separate fork lineage). Local rc staging branch. Full build + monetize/x402/dataset/embed test surface green. --- .env.example | 13 + CLAUDE.md | 12 + Dockerfile.x402-escrow | 10 + README.md | 54 + cmd/obol/bounty.go | 1270 +++++++++++++++++ cmd/obol/bounty_test.go | 315 ++++ cmd/obol/buy.go | 97 +- cmd/obol/buy_test.go | 35 + cmd/obol/dataset.go | 495 +++++++ cmd/obol/main.go | 6 + cmd/obol/model.go | 175 ++- cmd/obol/model_test.go | 46 + cmd/obol/node.go | 102 ++ cmd/obol/research.go | 397 ++++++ cmd/obol/sell.go | 281 +++- cmd/obol/sell_skill.go | 570 ++++++++ cmd/obol/sell_skill_test.go | 330 +++++ cmd/obol/sell_test.go | 103 ++ cmd/obol/skills.go | 442 ++++++ cmd/obol/skills_test.go | 319 +++++ cmd/obol/smoke.go | 140 ++ cmd/obol/smoke_test.go | 225 +++ cmd/x402-escrow/main.go | 144 ++ cmd/x402-escrow/main_test.go | 47 + docs/guides/monetize-dataset.md | 121 ++ docs/guides/skill-marketplace.md | 330 +++++ docs/guides/smoke-test-agent.md | 301 ++++ flows/flow-12-obol-payment.sh | 5 +- flows/flow-19-skill-sale.sh | 389 +++++ flows/flow-20-smoke-agent.sh | 367 +++++ flows/hf-surface-smoke.sh | 201 +++ go.mod | 6 +- go.sum | 13 +- internal/bounty/decay.go | 67 + internal/bounty/decay_test.go | 115 ++ internal/bounty/registry.go | 246 ++++ internal/bounty/registry_test.go | 205 +++ internal/dataset/artifacts.go | 56 + internal/dataset/bundle.go | 83 ++ internal/dataset/client.go | 131 ++ internal/dataset/client_test.go | 115 ++ internal/dataset/coverage_test.go | 184 +++ internal/dataset/entitlement.go | 77 + internal/dataset/entitlement_test.go | 62 + internal/dataset/helpers_test.go | 89 ++ internal/dataset/keyfile.go | 43 + internal/dataset/server.go | 447 ++++++ internal/dataset/server_test.go | 284 ++++ internal/dataset/signer.go | 59 + internal/dataset/store.go | 89 ++ internal/dataset/store_test.go | 68 + internal/dataset/versionlog.go | 244 ++++ internal/dataset/versionlog_test.go | 208 +++ internal/defaults/defaults.go | 1 + internal/defaults/defaults_test.go | 49 + .../bountytasks/benchlocal/report.a2ui.json | 65 + .../bountytasks/benchlocal/report.app.html | 37 + .../embed/bountytasks/benchlocal/task.yaml | 94 ++ .../bountytasks/benchmark/report.a2ui.json | 80 ++ .../bountytasks/benchmark/report.app.html | 40 + .../embed/bountytasks/benchmark/task.yaml | 118 ++ .../bountytasks/finetune/report.a2ui.json | 54 + internal/embed/bountytasks/finetune/task.yaml | 83 ++ internal/embed/embed.go | 37 + .../embed/embed_bounty_crd_parity_test.go | 181 +++ internal/embed/embed_bounty_rbac_test.go | 119 ++ internal/embed/embed_crd_test.go | 265 ++++ .../embed/embed_servicebounty_crd_test.go | 103 ++ .../templates/evaluatorenrollment-crd.yaml | 154 ++ .../obol-agent-admission-policy.yaml | 4 +- .../templates/obol-agent-monetize-rbac.yaml | 73 + .../base/templates/servicebounty-crd.yaml | 645 +++++++++ .../base/templates/serviceoffer-crd.yaml | 149 +- .../infrastructure/base/templates/x402.yaml | 187 +++ internal/embed/k3s-config.yaml | 6 +- .../embed/skills/dataset-anonymize/SKILL.md | 63 + .../dataset-anonymize/scripts/anonymize.py | 151 ++ .../embed/skills/finetune-backend/SKILL.md | 57 + .../skills/finetune-backend/scripts/runner.py | 159 +++ internal/embed/skills/monetize-guide/SKILL.md | 38 + .../embed/skills/research-program/SKILL.md | 87 ++ .../skills/research-program/scripts/worker.py | 206 +++ internal/embed/skills/sell/SKILL.md | 261 +++- .../sell/references/serviceoffer-spec.md | 63 +- internal/embed/skills/smoke-test/SKILL.md | 184 +++ .../skills/smoke-test/scripts/gh_post.py | 338 +++++ .../embed/skills/smoke-test/scripts/smoke.py | 523 +++++++ internal/enclave/enclave_darwin.go | 50 +- internal/enclave/enclave_stub.go | 24 +- internal/erc8004/abi.go | 8 +- internal/erc8004/bounty.go | 26 + internal/erc8004/bounty_test.go | 35 + internal/erc8004/calldata.go | 99 ++ internal/erc8004/networks_test.go | 2 +- internal/erc8004/reputation.go | 437 ++++++ internal/erc8004/reputation_registry.abi.json | 391 +++++ internal/erc8004/reputation_test.go | 410 ++++++ internal/erc8004/revert.go | 4 +- internal/erc8004/skill_tags.go | 106 ++ internal/erc8004/skill_tags_test.go | 303 ++++ internal/erc8004/smoke.go | 36 + internal/erc8004/smoke_test.go | 47 + internal/erc8004/validation.go | 401 ++++++ internal/erc8004/validation_registry.abi.json | 272 ++++ internal/erc8004/validation_test.go | 404 ++++++ internal/model/model.go | 244 +++- internal/model/model_test.go | 79 + internal/monetizeapi/evaluatorenrollment.go | 204 +++ internal/monetizeapi/servicebounty.go | 612 ++++++++ internal/monetizeapi/types.go | 196 ++- internal/monetizeapi/zz_generated.deepcopy.go | 299 +++- .../openclaw/monetize_integration_test.go | 57 +- internal/research/groupauth/groupauth.go | 323 +++++ internal/research/groupauth/groupauth_test.go | 98 ++ internal/research/kb/kb.go | 288 ++++ internal/research/kb/kb_test.go | 129 ++ internal/research/server/server.go | 244 ++++ internal/research/server/server_test.go | 118 ++ internal/schemas/payment.go | 54 +- internal/serviceoffercontroller/bounty.go | 565 ++++++++ .../bounty_escalation.go | 300 ++++ .../bounty_escalation_test.go | 785 ++++++++++ .../serviceoffercontroller/bounty_eval.go | 634 ++++++++ .../bounty_eval_test.go | 342 +++++ .../bounty_grounding.go | 122 ++ .../bounty_lifecycle_test.go | 540 +++++++ .../serviceoffercontroller/bounty_panel.go | 465 ++++++ .../bounty_panel_test.go | 562 ++++++++ .../bounty_structure_test.go | 36 + internal/serviceoffercontroller/controller.go | 97 +- internal/serviceoffercontroller/seed.go | 234 +++ internal/serviceoffercontroller/seed_test.go | 214 +++ internal/serviceoffercontroller/skill.go | 120 ++ .../serviceoffercontroller/skill_render.go | 226 +++ .../skill_render_test.go | 233 +++ internal/serviceoffercontroller/skill_test.go | 352 +++++ internal/skillpkg/bundle.go | 241 ++++ internal/skillpkg/bundle_test.go | 396 +++++ internal/stack/backend_k3s.go | 3 + internal/stack/backend_k3s_init_test.go | 48 + internal/stack/node.go | 128 ++ internal/stack/node_test.go | 111 ++ internal/stack/safety_test.go | 4 +- internal/stack/stack.go | 1 + internal/testutil/anvil.go | 31 + internal/testutil/facilitator_real.go | 64 +- internal/x402/card.go | 486 +++++++ internal/x402/card_test.go | 389 +++++ internal/x402/config.go | 41 + internal/x402/escrow/gateway.go | 264 ++++ internal/x402/escrow/gateway_test.go | 105 ++ internal/x402/escrow/permit2.go | 280 ++++ internal/x402/escrow/permit2_test.go | 325 +++++ internal/x402/escrow/server.go | 382 +++++ internal/x402/escrow/server_test.go | 416 ++++++ internal/x402/escrow/settle.go | 342 +++++ internal/x402/escrow/settle_test.go | 184 +++ internal/x402/escrow/store.go | 109 ++ internal/x402/serviceoffer_source.go | 34 + .../x402/serviceoffer_source_skill_test.go | 119 ++ internal/x402/serviceoffer_source_test.go | 62 + internal/x402/skill_extras_test.go | 138 ++ internal/x402/verifier.go | 36 + internal/x402/verifier_test.go | 2 +- internal/x402mcp/bountyreport.go | 205 +++ internal/x402mcp/bountyreport_test.go | 146 ++ internal/x402mcp/server.go | 11 + justfile | 2 + plans/bounty-ane-marketplace-design.md | 765 ++++++++++ plans/dataset-subscription-v1.1-pitch.md | 110 ++ plans/evaluator-market-research-notes.md | 111 ++ plans/servicebounty-technical-spec.md | 349 +++++ tests/test_gh_post_no_redirect.py | 157 ++ 173 files changed, 33053 insertions(+), 278 deletions(-) create mode 100644 Dockerfile.x402-escrow create mode 100644 cmd/obol/bounty.go create mode 100644 cmd/obol/bounty_test.go create mode 100644 cmd/obol/dataset.go create mode 100644 cmd/obol/node.go create mode 100644 cmd/obol/research.go create mode 100644 cmd/obol/sell_skill.go create mode 100644 cmd/obol/sell_skill_test.go create mode 100644 cmd/obol/skills.go create mode 100644 cmd/obol/skills_test.go create mode 100644 cmd/obol/smoke.go create mode 100644 cmd/obol/smoke_test.go create mode 100644 cmd/x402-escrow/main.go create mode 100644 cmd/x402-escrow/main_test.go create mode 100644 docs/guides/monetize-dataset.md create mode 100644 docs/guides/skill-marketplace.md create mode 100644 docs/guides/smoke-test-agent.md create mode 100755 flows/flow-19-skill-sale.sh create mode 100755 flows/flow-20-smoke-agent.sh create mode 100755 flows/hf-surface-smoke.sh create mode 100644 internal/bounty/decay.go create mode 100644 internal/bounty/decay_test.go create mode 100644 internal/bounty/registry.go create mode 100644 internal/bounty/registry_test.go create mode 100644 internal/dataset/artifacts.go create mode 100644 internal/dataset/bundle.go create mode 100644 internal/dataset/client.go create mode 100644 internal/dataset/client_test.go create mode 100644 internal/dataset/coverage_test.go create mode 100644 internal/dataset/entitlement.go create mode 100644 internal/dataset/entitlement_test.go create mode 100644 internal/dataset/helpers_test.go create mode 100644 internal/dataset/keyfile.go create mode 100644 internal/dataset/server.go create mode 100644 internal/dataset/server_test.go create mode 100644 internal/dataset/signer.go create mode 100644 internal/dataset/store.go create mode 100644 internal/dataset/store_test.go create mode 100644 internal/dataset/versionlog.go create mode 100644 internal/dataset/versionlog_test.go create mode 100644 internal/embed/bountytasks/benchlocal/report.a2ui.json create mode 100644 internal/embed/bountytasks/benchlocal/report.app.html create mode 100644 internal/embed/bountytasks/benchlocal/task.yaml create mode 100644 internal/embed/bountytasks/benchmark/report.a2ui.json create mode 100644 internal/embed/bountytasks/benchmark/report.app.html create mode 100644 internal/embed/bountytasks/benchmark/task.yaml create mode 100644 internal/embed/bountytasks/finetune/report.a2ui.json create mode 100644 internal/embed/bountytasks/finetune/task.yaml create mode 100644 internal/embed/embed_bounty_crd_parity_test.go create mode 100644 internal/embed/embed_bounty_rbac_test.go create mode 100644 internal/embed/embed_servicebounty_crd_test.go create mode 100644 internal/embed/infrastructure/base/templates/evaluatorenrollment-crd.yaml create mode 100644 internal/embed/infrastructure/base/templates/servicebounty-crd.yaml create mode 100644 internal/embed/skills/dataset-anonymize/SKILL.md create mode 100644 internal/embed/skills/dataset-anonymize/scripts/anonymize.py create mode 100644 internal/embed/skills/finetune-backend/SKILL.md create mode 100644 internal/embed/skills/finetune-backend/scripts/runner.py create mode 100644 internal/embed/skills/research-program/SKILL.md create mode 100644 internal/embed/skills/research-program/scripts/worker.py create mode 100644 internal/embed/skills/smoke-test/SKILL.md create mode 100644 internal/embed/skills/smoke-test/scripts/gh_post.py create mode 100644 internal/embed/skills/smoke-test/scripts/smoke.py create mode 100644 internal/erc8004/bounty.go create mode 100644 internal/erc8004/bounty_test.go create mode 100644 internal/erc8004/calldata.go create mode 100644 internal/erc8004/reputation.go create mode 100644 internal/erc8004/reputation_registry.abi.json create mode 100644 internal/erc8004/reputation_test.go create mode 100644 internal/erc8004/skill_tags.go create mode 100644 internal/erc8004/skill_tags_test.go create mode 100644 internal/erc8004/smoke.go create mode 100644 internal/erc8004/smoke_test.go create mode 100644 internal/erc8004/validation.go create mode 100644 internal/erc8004/validation_registry.abi.json create mode 100644 internal/erc8004/validation_test.go create mode 100644 internal/monetizeapi/evaluatorenrollment.go create mode 100644 internal/monetizeapi/servicebounty.go create mode 100644 internal/research/groupauth/groupauth.go create mode 100644 internal/research/groupauth/groupauth_test.go create mode 100644 internal/research/kb/kb.go create mode 100644 internal/research/kb/kb_test.go create mode 100644 internal/research/server/server.go create mode 100644 internal/research/server/server_test.go create mode 100644 internal/serviceoffercontroller/bounty.go create mode 100644 internal/serviceoffercontroller/bounty_escalation.go create mode 100644 internal/serviceoffercontroller/bounty_escalation_test.go create mode 100644 internal/serviceoffercontroller/bounty_eval.go create mode 100644 internal/serviceoffercontroller/bounty_eval_test.go create mode 100644 internal/serviceoffercontroller/bounty_grounding.go create mode 100644 internal/serviceoffercontroller/bounty_lifecycle_test.go create mode 100644 internal/serviceoffercontroller/bounty_panel.go create mode 100644 internal/serviceoffercontroller/bounty_panel_test.go create mode 100644 internal/serviceoffercontroller/bounty_structure_test.go create mode 100644 internal/serviceoffercontroller/seed.go create mode 100644 internal/serviceoffercontroller/seed_test.go create mode 100644 internal/serviceoffercontroller/skill.go create mode 100644 internal/serviceoffercontroller/skill_render.go create mode 100644 internal/serviceoffercontroller/skill_render_test.go create mode 100644 internal/serviceoffercontroller/skill_test.go create mode 100644 internal/skillpkg/bundle.go create mode 100644 internal/skillpkg/bundle_test.go create mode 100644 internal/stack/backend_k3s_init_test.go create mode 100644 internal/stack/node.go create mode 100644 internal/stack/node_test.go create mode 100644 internal/x402/card.go create mode 100644 internal/x402/card_test.go create mode 100644 internal/x402/escrow/gateway.go create mode 100644 internal/x402/escrow/gateway_test.go create mode 100644 internal/x402/escrow/permit2.go create mode 100644 internal/x402/escrow/permit2_test.go create mode 100644 internal/x402/escrow/server.go create mode 100644 internal/x402/escrow/server_test.go create mode 100644 internal/x402/escrow/settle.go create mode 100644 internal/x402/escrow/settle_test.go create mode 100644 internal/x402/escrow/store.go create mode 100644 internal/x402/serviceoffer_source_skill_test.go create mode 100644 internal/x402/skill_extras_test.go create mode 100644 internal/x402mcp/bountyreport.go create mode 100644 internal/x402mcp/bountyreport_test.go create mode 100644 plans/bounty-ane-marketplace-design.md create mode 100644 plans/dataset-subscription-v1.1-pitch.md create mode 100644 plans/evaluator-market-research-notes.md create mode 100644 plans/servicebounty-technical-spec.md create mode 100644 tests/test_gh_post_no_redirect.py diff --git a/.env.example b/.env.example index 475807d6..11ae4a08 100644 --- a/.env.example +++ b/.env.example @@ -11,3 +11,16 @@ ANTHROPIC_API_KEY= # Required for TestIntegration_OpenAIInference OPENAI_API_KEY= + +# ── MPP credit-card payments (Stripe) ────────────────────────────────────── +# Seller-side credit-card settlement via the Machine Payments Protocol (MPP). +# Requires a Stripe account with "Machine payments" enabled. See the +# "Credit-card payments (MPP)" section of README.md. +# +# Consumed by the x402-verifier (sourced from the x402-secrets Secret in the +# `x402` namespace) to authorize/capture Stripe PaymentIntents for card offers. +STRIPE_SECRET_KEY= +# Your Stripe "machine payments" network id, advertised in the 402 challenge so +# card clients can mint a Shared Payment Token. Default for +# `obol sell http --pay-with card --stripe-network-id`. +STRIPE_NETWORK_ID= diff --git a/CLAUDE.md b/CLAUDE.md index b52da276..1b279e77 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -262,6 +262,18 @@ Caveats: **Auto-configuration**: `obol stack up` → `autoConfigureLLM()` detects host Ollama models, patches LiteLLM config. `obolup.sh` → `check_agent_model_api_key()` reads `~/.openclaw/openclaw.json`, resolves API key from `ANTHROPIC_API_KEY` / `CLAUDE_CODE_OAUTH_TOKEN` (Anthropic) or `OPENAI_API_KEY` (OpenAI), exports for downstream. +**BYOK cloud providers** (easiest getting-started path) — provider knowledge is a single registry in `internal/model/model.go` (`knownProviders` / `ProviderInfo` with `Mode`/`BaseURL`/`Default`/`SignupURL`/`Free`); adding a provider is one row, no per-provider switch. Built-in: `anthropic`, `openai`, `ollama` (native/local) + OpenAI-compatible aggregators `venice`, `openrouter`, `nvidia`, `gmi`, `novita`, `huggingface` (`Mode=openai-compatible` → `model_list` entry `openai/` + explicit `api_base` + key from the provider's env var; no wildcard). When `--model` is omitted, setup uses the registry `Default` or lists the live `GET /v1/models` (TTY picker / non-TTY error naming real ids). `--free` seeds only the curated free-tier model snapshot (OpenRouter). + +Two front doors share one engine (`setupCloudProvider` in `cmd/obol/model.go`): +- `obol buy inference ` — friendly onboarding: opens the provider's `SignupURL` in the browser (`openBrowser`, hermes-style), takes the key (`--api-key` → env var → prompt), wires LiteLLM + syncs agents. `obol buy inference` with a URL/no-arg is still the **x402 crypto-paid seller** path — dispatch keys on whether the positional arg matches a registry provider id. +- `obol model setup --api-key ` — the scriptable, no-browser equivalent. Unlisted endpoints still use `obol model setup custom`. + +```bash +obol buy inference venice # opens venice key page, prompts, wires up +obol buy inference openrouter --free # seeds curated free models +obol model setup venice --api-key $VENICE_API_KEY # scriptable / CI +``` + **External OpenAI-compatible LLM** (vLLM / sglang / mlx-lm / remote GPU) — canonical user flow, no ConfigMap surgery: ```bash diff --git a/Dockerfile.x402-escrow b/Dockerfile.x402-escrow new file mode 100644 index 00000000..df464973 --- /dev/null +++ b/Dockerfile.x402-escrow @@ -0,0 +1,10 @@ +FROM golang:1.25-alpine AS builder +WORKDIR /src +COPY go.mod go.sum ./ +RUN go mod download +COPY . . +RUN CGO_ENABLED=0 go build -o /x402-escrow ./cmd/x402-escrow + +FROM gcr.io/distroless/static-debian12:nonroot +COPY --from=builder /x402-escrow /x402-escrow +ENTRYPOINT ["/x402-escrow"] diff --git a/README.md b/README.md index 818f9f3f..2420aae0 100644 --- a/README.md +++ b/README.md @@ -232,6 +232,60 @@ obol openclaw skills remove # remove via openclaw CLI in pod Skills are delivered via host-path PVC injection — no ConfigMap size limits, works before pod readiness, and survives pod restarts. +## Credit-card payments (MPP) + +Alongside the default x402 on-chain (stablecoin) payment path, sellers can accept +**credit-card** payments via the [Machine Payments Protocol](https://mpp.dev) (MPP, +the Stripe + Tempo HTTP-402 standard). A card offer is gated on the same +`/services//*` route as a crypto offer — the payment method is selected per +offer. + +```bash +# Expose an upstream as a card-paid endpoint (Stripe stripe.charge). +obol sell http my-api \ + --pay-with card \ + --stripe-account acct_1A2b3C4d \ # Stripe destination account (card analog of --pay-to) + --stripe-network-id stripenet_...\ # Stripe "machine payments" network id (or STRIPE_NETWORK_ID) + --card-currency usd \ + --upstream my-svc --port 8080 --price 0.01 +``` + +How it works: + +- The offer advertises a `card` option in its `402` challenge (amount in the + currency's **minor units** — cents for `usd`, whole yen for `jpy`, etc.). +- A card-capable buyer presents a Stripe **Shared Payment Token** (`spt_…`) in the + `X-PAYMENT` header. +- The verifier **authorizes** a manual-capture Stripe PaymentIntent before serving, + proxies to the upstream, then **captures** only after a successful (`<400`) + response — a failed upstream **cancels** the hold, so a buyer is never charged for + nothing. Each SPT is single-use (replay-guarded). + +### Requirements & configuration + +- A **Stripe account with "Machine payments" enabled** (a gated Stripe feature). +- `STRIPE_SECRET_KEY` — used by the `x402-verifier` to authorize/capture + PaymentIntents. It is read from the `x402-secrets` Secret in the `x402` namespace; + populate it before taking card payments: + + ```bash + kubectl -n x402 patch secret x402-secrets --type merge \ + -p '{"stringData":{"STRIPE_SECRET_KEY":"sk_live_..."}}' + kubectl -n x402 rollout restart deploy/x402-verifier + ``` + +- `STRIPE_NETWORK_ID` — your Stripe "machine payments" network id, advertised in the + 402 challenge so clients can mint an SPT. It is a host/CLI value (default for + `--stripe-network-id`); add both to your `.env` from `.env.example`. + +> **Note on scope.** Card offers are not ERC-8004 registered (no on-chain identity). +> The Stripe key is currently a single cluster-wide value in `x402-secrets`; a +> per-offer/per-namespace Secret is the production direction but is gated on widening +> the verifier's deliberately `resourceName`-scoped Secret RBAC. The SPT replay guard +> is per-pod (the verifier runs single-replica). The SPT is passed as the top-level +> Stripe form field `shared_payment_granted_token` per the `cp0x-org/mppx` reference — +> validate against your live Stripe account before relying on it in production. + ## Public Access (Cloudflare Tunnel) Expose your stack to the internet via Cloudflare Tunnel: diff --git a/cmd/obol/bounty.go b/cmd/obol/bounty.go new file mode 100644 index 00000000..6cdf556a --- /dev/null +++ b/cmd/obol/bounty.go @@ -0,0 +1,1270 @@ +package main + +import ( + "context" + "encoding/json" + "fmt" + "math/big" + "slices" + "strconv" + "strings" + "time" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + "github.com/ObolNetwork/obol-stack/internal/bounty" + "github.com/ObolNetwork/obol-stack/internal/config" + "github.com/ObolNetwork/obol-stack/internal/erc8004" + "github.com/ObolNetwork/obol-stack/internal/kubectl" + "github.com/ObolNetwork/obol-stack/internal/monetizeapi" + "github.com/ObolNetwork/obol-stack/internal/ui" + x402verifier "github.com/ObolNetwork/obol-stack/internal/x402" + "github.com/ObolNetwork/obol-stack/internal/x402/escrow" + "github.com/ethereum/go-ethereum/common" + ethcrypto "github.com/ethereum/go-ethereum/crypto" + "github.com/urfave/cli/v3" +) + +// Voucher ferry annotations — must match the serviceoffer-controller's +// bounty_eval.go constants exactly (the CLI writes, the controller reads; the +// controller never signs and escrow endpoint/credentials never ride in here). +const ( + bountyRewardVoucherAnnotation = "obol.org/reward-voucher" + bountyBondVoucherAnnotation = "obol.org/bond-voucher" + bountyEvalVoucherAnnotation = "obol.org/eval-voucher" + bountyEvalVoucherR1Annotation = "obol.org/eval-voucher-r1" +) + +// bountyCommand is the demand-side counterpart to `obol sell`: post a +// ServiceBounty (escrowed reward for work) instead of a ServiceOffer. Task +// types are discovered dynamically from the embedded catalog — exactly like +// `obol network install ` builds a subcommand per embedded network — so +// `obol bounty post` lists only the types live in this release. +func bountyCommand(cfg *config.Config) *cli.Command { + return &cli.Command{ + Name: "bounty", + Usage: "Post and manage ServiceBounties (demand-side: pay for benchmarks, fine-tunes, serving)", + Commands: []*cli.Command{ + { + Name: "post", + Usage: "Post a bounty for a task type (run `obol bounty post` to list the available types)", + Commands: buildBountyPostCommands(cfg), + Action: func(ctx context.Context, cmd *cli.Command) error { + return cli.ShowSubcommandHelp(cmd) + }, + }, + bountyTypesCommand(cfg), + bountyListCommand(cfg), + bountyStatusCommand(cfg), + bountyFundCommand(cfg), + bountyClaimCommand(cfg), + bountySubmitCommand(cfg), + bountyFeedbackCommand(cfg), + bountyVerdictCommand(cfg, "accept", "Accept the submission (poster verdict; releases the escrowed reward)"), + bountyVerdictCommand(cfg, "reject", "Reject the submission (poster verdict; escrow stays held until deadline refund)"), + bountyEvalCommand(cfg), + }, + } +} + +// bountyEvalCommand carries the evaluator-side commit-reveal verbs. Commitments +// are address-bound (hash includes the evaluator address) and the controller +// opens the reveal window only after K commitments are in — committing first +// and revealing later is the protocol, not a convenience. +func bountyEvalCommand(cfg *config.Config) *cli.Command { + return &cli.Command{ + Name: "eval", + Usage: "Evaluator verbs: enroll in the pool, commit and reveal quorum scores", + Commands: []*cli.Command{ + { + Name: "enroll", + Usage: "Enroll as an evaluator (joins the selection pool at the Shadow tier)", + ArgsUsage: "", + Flags: []cli.Flag{ + &cli.StringFlag{Name: "namespace", Aliases: []string{"n"}, Usage: "Namespace", Value: "hermes-obol-agent"}, + &cli.StringFlag{Name: "address", Usage: "[REQUIRED] Evaluator payout/identity address (0x...)", Required: true}, + &cli.StringFlag{Name: "task-types", Usage: "Comma-separated task-type refs you can re-run", Value: "benchmark@v1"}, + &cli.StringFlag{Name: "attestation-scheme", Usage: "Device attestation scheme [none|secure-enclave]", Value: "none"}, + &cli.BoolFlag{Name: "dry-run", Usage: "Print the EvaluatorEnrollment manifest instead of applying it"}, + }, + Action: func(ctx context.Context, cmd *cli.Command) error { + name := cmd.Args().First() + if name == "" { + return fmt.Errorf("missing enrollment name: obol bounty eval enroll --address 0x...") + } + enrollment := monetizeapi.EvaluatorEnrollment{ + TypeMeta: metav1.TypeMeta{ + APIVersion: monetizeapi.Group + "/" + monetizeapi.Version, + Kind: monetizeapi.EvaluatorEnrollmentKind, + }, + ObjectMeta: metav1.ObjectMeta{Name: name, Namespace: cmd.String("namespace")}, + Spec: monetizeapi.EvaluatorEnrollmentSpec{ + Address: cmd.String("address"), + TaskTypes: strings.Split(cmd.String("task-types"), ","), + Attestation: monetizeapi.EvaluatorAttestation{Scheme: cmd.String("attestation-scheme")}, + }, + } + if cmd.Bool("dry-run") { + out, err := json.MarshalIndent(enrollment, "", " ") + if err != nil { + return err + } + fmt.Printf("# EvaluatorEnrollment (dry-run)\n%s\n", out) + return nil + } + out, err := kubectlApplyOutput(cfg, enrollment) + if err != nil { + return fmt.Errorf("apply EvaluatorEnrollment: %w", err) + } + fmt.Print(out) + fmt.Println("Enrolled at the Shadow tier: you'll be randomly assigned shadow seats; agreements with the quorum median climb the ladder.") + return nil + }, + }, + { + Name: "pool", + Usage: "List the enrolled evaluator pool", + Flags: []cli.Flag{ + &cli.StringFlag{Name: "namespace", Aliases: []string{"n"}, Usage: "Namespace (default: all namespaces)"}, + }, + Action: func(ctx context.Context, cmd *cli.Command) error { + bin, kc := kubectl.Paths(cfg) + args := []string{"get", "evaluatorenrollments.obol.org", "-o", "wide"} + if ns := cmd.String("namespace"); ns != "" { + args = append(args, "-n", ns) + } else { + args = append(args, "-A") + } + out, err := kubectl.Output(bin, kc, args...) + if err != nil { + return err + } + fmt.Print(out) + return nil + }, + }, + { + Name: "commit", + Usage: "Commit your score (only the address-bound hash is published; keep the salt for reveal)", + ArgsUsage: "", + Flags: []cli.Flag{ + &cli.StringFlag{Name: "namespace", Aliases: []string{"n"}, Usage: "Namespace", Value: "hermes-obol-agent"}, + &cli.StringFlag{Name: "address", Usage: "[REQUIRED] Evaluator address (0x...)", Required: true}, + &cli.IntFlag{Name: "score", Usage: "[REQUIRED] Verdict score 0-100 (>=50 verifies)", Required: true}, + &cli.StringFlag{Name: "salt", Usage: "[REQUIRED] Random salt — KEEP IT; the reveal is unverifiable without it", Required: true}, + }, + Action: func(ctx context.Context, cmd *cli.Command) error { + name := cmd.Args().First() + if name == "" { + return fmt.Errorf("missing bounty name: obol bounty eval commit --address 0x... --score N --salt s") + } + score := int64(cmd.Int("score")) + if score < 0 || score > 100 { + return fmt.Errorf("--score %d out of range 0-100", score) + } + addr := strings.ToLower(cmd.String("address")) + hash := monetizeapi.EvalCommitHash(score, cmd.String("salt"), addr) + fmt.Printf("Committing %s (score and salt stay local — reveal with the SAME --score and --salt)\n", hash) + return annotateBountyCLI(cfg, cmd.String("namespace"), name, + []string{"obol.org/eval-commit-" + addr + "=" + hash}) + }, + }, + { + Name: "reveal", + Usage: "Reveal your committed score (accepted once K commitments are in)", + ArgsUsage: "", + Flags: []cli.Flag{ + &cli.StringFlag{Name: "namespace", Aliases: []string{"n"}, Usage: "Namespace", Value: "hermes-obol-agent"}, + &cli.StringFlag{Name: "address", Usage: "[REQUIRED] Evaluator address (0x...)", Required: true}, + &cli.IntFlag{Name: "score", Usage: "[REQUIRED] The committed score", Required: true}, + &cli.StringFlag{Name: "salt", Usage: "[REQUIRED] The committed salt", Required: true}, + &cli.StringFlag{Name: "validation-tx", Usage: "Optional ERC-8004 validationResponse tx hash you submitted on-chain (recorded as provenance)"}, + }, + Action: func(ctx context.Context, cmd *cli.Command) error { + name := cmd.Args().First() + if name == "" { + return fmt.Errorf("missing bounty name: obol bounty eval reveal --address 0x... --score N --salt s") + } + payload := map[string]any{ + "score": int64(cmd.Int("score")), + "salt": cmd.String("salt"), + } + if tx := cmd.String("validation-tx"); tx != "" { + payload["validationTx"] = tx + } + raw, err := json.Marshal(payload) + if err != nil { + return err + } + addr := strings.ToLower(cmd.String("address")) + return annotateBountyCLI(cfg, cmd.String("namespace"), name, + []string{"obol.org/eval-reveal-" + addr + "=" + string(raw)}) + }, + }, + { + Name: "calldata", + Usage: "Print ERC-8004 validationResponse calldata for your wallet to submit (the controller NEVER signs)", + Flags: []cli.Flag{ + &cli.StringFlag{Name: "namespace", Aliases: []string{"n"}, Usage: "Namespace (with --bounty)", Value: "hermes-obol-agent"}, + &cli.StringFlag{Name: "network", Usage: "Chain", Value: "base-sepolia"}, + &cli.StringFlag{Name: "bounty", Usage: "Bounty name — derives the request hash from the bounty UID + --address"}, + &cli.StringFlag{Name: "address", Usage: "Your evaluator address (0x...; required with --bounty)"}, + &cli.StringFlag{Name: "request-hash", Usage: "Explicit validation request hash (bytes32, 0x...) — overrides --bounty derivation"}, + &cli.IntFlag{Name: "response", Usage: "[REQUIRED] Your 0-100 verdict score", Required: true}, + &cli.StringFlag{Name: "response-uri", Usage: "Optional URI of your evaluation report"}, + &cli.StringFlag{Name: "tag", Usage: "Optional tag (e.g. the task type ref)"}, + }, + Action: func(ctx context.Context, cmd *cli.Command) error { + response := cmd.Int("response") + if response < 0 || response > 100 { + return fmt.Errorf("--response %d out of range 0-100", response) + } + requestHash, err := resolveEvalRequestHash(cfg, cmd) + if err != nil { + return err + } + registry, err := erc8004.ValidationRegistryAddress(cmd.String("network")) + if err != nil { + return err + } + calldata, err := erc8004.EncodeValidationResponse( + requestHash, + uint8(response), + cmd.String("response-uri"), + common.Hash{}, + cmd.String("tag"), + ) + if err != nil { + return err + } + fmt.Printf("Request hash: %s\n", requestHash.Hex()) + fmt.Printf("ValidationRegistry (%s): %s\n", cmd.String("network"), registry) + fmt.Printf("Calldata: 0x%x\n", calldata) + fmt.Println("Submit with YOUR wallet (e.g. the agent remote-signer or cast send) — then pass the tx hash to `obol bounty eval reveal --validation-tx`.") + return nil + }, + }, + bountyEvalFundCommand(cfg), + }, + } +} + +// resolveEvalRequestHash returns the explicit --request-hash override, or +// derives the hash from the named bounty's UID + the evaluator --address via +// erc8004.BountyEvalRequestHash (the controller grounds reveals against the +// exact same derivation). +func resolveEvalRequestHash(cfg *config.Config, cmd *cli.Command) (common.Hash, error) { + if raw := strings.TrimSpace(cmd.String("request-hash")); raw != "" { + return common.HexToHash(raw), nil + } + name := strings.TrimSpace(cmd.String("bounty")) + address := strings.TrimSpace(cmd.String("address")) + if name == "" || address == "" { + return common.Hash{}, fmt.Errorf("pass --request-hash 0x..., or --bounty with --address 0x... to derive it from the bounty UID") + } + if !common.IsHexAddress(address) { + return common.Hash{}, fmt.Errorf("--address %q is not a 0x address", address) + } + sb, err := getBountyCLI(cfg, cmd.String("namespace"), name) + if err != nil { + return common.Hash{}, err + } + if sb.UID == "" { + return common.Hash{}, fmt.Errorf("bounty %s has no UID — cannot derive the request hash", name) + } + return erc8004.BountyEvalRequestHash(string(sb.UID), address), nil +} + +// bountyTypesCommand lists the enabled task-type catalog with its eval/pricing +// policy, so an operator can see what bounties are postable and on what terms. +func bountyTypesCommand(cfg *config.Config) *cli.Command { + return &cli.Command{ + Name: "types", + Usage: "List the available ServiceBounty task types (the dynamic catalog)", + Action: func(ctx context.Context, cmd *cli.Command) error { + types, err := bounty.Enabled() + if err != nil { + return err + } + if len(types) == 0 { + fmt.Println("No bounty task types are enabled in this release.") + return nil + } + for _, t := range types { + fmt.Printf("• %-14s %s\n", t.Ref(), t.Summary) + fmt.Printf(" runner=%s acceptance=%s eval-k=%d paid-in=%s/%s hardware-proof=%s\n", + t.Runner, t.Acceptance.Method, t.Eval.DefaultK, + t.Eval.Payment.Asset, t.Eval.Payment.Settle, t.HardwareProof) + } + return nil + }, + } +} + +// commonBountyFlags are shared by every `obol bounty post ` subcommand. +// The bounty name is positional (`obol bounty post benchmark `), matching +// `obol sell http `. +func commonBountyFlags() []cli.Flag { + return []cli.Flag{ + &cli.StringFlag{Name: "namespace", Aliases: []string{"n"}, Usage: "Namespace for the ServiceBounty", Value: "hermes-obol-agent"}, + &cli.StringFlag{Name: "model", Usage: "Target model id (spec.task.targetModel.name)"}, + &cli.StringFlag{Name: "runtime", Usage: "Target model runtime", Value: "vllm"}, + &cli.StringFlag{Name: "reward", Usage: "[REQUIRED] Reward amount in human units (e.g. 500.00)", Required: true}, + &cli.StringFlag{Name: "asset", Usage: "Reward asset symbol", Value: "USDC"}, + &cli.StringFlag{Name: "chain", Usage: "Payment network", Value: "base"}, + &cli.StringFlag{Name: "pay-to", Usage: "Escrow-return / poster address (0x...)"}, + &cli.StringFlag{Name: "escrow-scheme", Usage: "x402 escrow scheme [upto|authCapture]", Value: "upto"}, + &cli.StringFlag{Name: "facilitator", Usage: "x402 facilitator URL", Value: "https://x402.gcp.obol.tech"}, + &cli.StringFlag{Name: "deadline", Usage: "RFC3339 deadline (e.g. 2026-07-01T00:00:00Z)"}, + &cli.IntFlag{Name: "max-fulfillers", Usage: "Max paid fulfillers (1 = single-winner)", Value: 1}, + &cli.IntFlag{Name: "eval-k", Usage: "Evaluators to sample (defaults to the task type's defaultK)"}, + &cli.BoolFlag{Name: "dangerously-skip-verification", Usage: "Skip the evaluator quorum: poster-as-judge, bounty marked unverified, no reputation feedback emitted"}, + &cli.StringFlag{Name: "hardware-proof", Usage: "Hardware proof strength [self-report|gpu-attestation|evaluator-measured] (defaults to the task type's policy)"}, + &cli.StringFlag{Name: "tolerance", Usage: "Per-metric acceptance bands, metric=band pairs (e.g. totalScore=0.05,mmlu=0.01); overlays the task type's defaults"}, + &cli.StringFlag{Name: "dataset-commit", Usage: "Merkle root committing the (partially private) eval dataset"}, + &cli.StringFlag{Name: "private-fraction", Usage: "Fraction of dataset rows kept private, 0..1 (e.g. 0.2); revealed only to sampled evaluators"}, + &cli.StringFlag{Name: "bond", Usage: "Optional refundable self-bond amount (own funds; never slashed)"}, + &cli.BoolFlag{Name: "yes", Aliases: []string{"y"}, Usage: "Skip the cost-preview confirmation"}, + &cli.BoolFlag{Name: "dry-run", Usage: "Print the ServiceBounty manifest instead of applying it"}, + } +} + +// buildBountyPostCommands creates one `post` subcommand per ENABLED task type, +// with flags generated from that type's param schema. +func buildBountyPostCommands(cfg *config.Config) []*cli.Command { + types, err := bounty.Enabled() + if err != nil { + return nil + } + + var commands []*cli.Command + for _, t := range types { + flags := commonBountyFlags() + for _, p := range t.Params { + usage := p.Description + if usage == "" { + usage = "Set " + p.Name + } + if len(p.Enum) > 0 { + usage += fmt.Sprintf(" [options: %s]", strings.Join(p.Enum, ", ")) + } + required := p.Required && p.Default == "" + if required { + usage = "[REQUIRED] " + usage + } + flags = append(flags, &cli.StringFlag{ + Name: paramFlagName(p.Name), + Usage: usage, + Value: p.Default, + Required: required, + }) + } + + tt := t // capture for the closure + commands = append(commands, &cli.Command{ + Name: tt.ID, + Usage: tt.Summary, + ArgsUsage: "", + Flags: flags, + Action: func(ctx context.Context, cmd *cli.Command) error { + return postBounty(cfg, ui.New(false), cmd, tt) + }, + }) + } + + return commands +} + +// paramFlagName converts a task-package param name to the CLI's kebab-case +// flag convention, e.g. hardwareClass -> hardware-class (the same mapping +// network.fieldNameToFlagName applies to template fields). +func paramFlagName(param string) string { + var b strings.Builder + for i, r := range param { + if i > 0 && r >= 'A' && r <= 'Z' { + b.WriteRune('-') + } + b.WriteRune(r) + } + + return strings.ToLower(b.String()) +} + +// postBounty builds a ServiceBounty CR from the flags + task-type defaults, +// shows the two-leg cost preview (reward escrow + OBOL eval bill), confirms in +// a TTY, and applies the manifest. +func postBounty(cfg *config.Config, u *ui.UI, cmd *cli.Command, t bounty.TaskType) error { + name := cmd.Args().First() + if name == "" { + return fmt.Errorf("missing bounty name: obol bounty post %s [flags]", t.ID) + } + + // Collect + validate the type's params against its schema. Flags are the + // kebab-case form of the param name; the CR keeps the package's name. + params := make(map[string]string) + for _, p := range t.Params { + flag := paramFlagName(p.Name) + v := cmd.String(flag) + if v == "" { + v = p.Default + } + if p.Required && v == "" { + return fmt.Errorf("--%s is required for task type %s", flag, t.Ref()) + } + if len(p.Enum) > 0 && v != "" && !slices.Contains(p.Enum, v) { + return fmt.Errorf("--%s=%q is not one of [%s]", flag, v, strings.Join(p.Enum, ", ")) + } + if v != "" { + params[p.Name] = v + } + } + + evalK := int64(cmd.Int("eval-k")) + if evalK == 0 { + evalK = int64(t.Eval.DefaultK) + } + + evalMode := monetizeapi.EvalModeRequired + if cmd.Bool("dangerously-skip-verification") { + evalMode = monetizeapi.EvalModeDangerouslySkipped + } + + hardwareProof := cmd.String("hardware-proof") + if hardwareProof == "" { + hardwareProof = t.HardwareProof + } + switch hardwareProof { + case "", "self-report", "gpu-attestation", "evaluator-measured": + default: + return fmt.Errorf("--hardware-proof=%q is not one of [self-report, gpu-attestation, evaluator-measured]", hardwareProof) + } + + // Tolerance: the task type's bands, overlaid by --tolerance metric=band + // pairs (BenchLocal-style packs have their own metric keys). + tolerance := make(map[string]string, len(t.Acceptance.Tolerance)) + for k, v := range t.Acceptance.Tolerance { + tolerance[k] = v + } + if raw := cmd.String("tolerance"); raw != "" { + for _, pair := range strings.Split(raw, ",") { + metric, band, ok := strings.Cut(strings.TrimSpace(pair), "=") + if !ok || metric == "" || band == "" { + return fmt.Errorf("--tolerance entry %q is not metric=band", pair) + } + tolerance[metric] = band + } + } + + var deadline *metav1.Time + if d := cmd.String("deadline"); d != "" { + parsed, err := time.Parse(time.RFC3339, d) + if err != nil { + return fmt.Errorf("--deadline %q is not RFC3339 (e.g. 2026-07-01T00:00:00Z): %w", d, err) + } + deadline = &metav1.Time{Time: parsed} + } + + sb := monetizeapi.ServiceBounty{ + TypeMeta: metav1.TypeMeta{ + APIVersion: monetizeapi.Group + "/" + monetizeapi.Version, + Kind: monetizeapi.ServiceBountyKind, + }, + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: cmd.String("namespace"), + }, + Spec: monetizeapi.ServiceBountySpec{ + Task: monetizeapi.ServiceBountyTask{ + TypeRef: t.Ref(), + Params: params, + TargetModel: monetizeapi.ServiceOfferModel{Name: cmd.String("model"), Runtime: cmd.String("runtime")}, + HardwareProof: hardwareProof, + DatasetCommit: monetizeapi.ServiceBountyDatasetCommit{ + Root: cmd.String("dataset-commit"), + PrivateFraction: cmd.String("private-fraction"), + }, + }, + Acceptance: monetizeapi.ServiceBountyAcceptance{ + Method: t.Acceptance.Method, + Tolerance: tolerance, + CommitReveal: t.Acceptance.CommitReveal, + }, + Reward: monetizeapi.ServiceBountyReward{ + Network: cmd.String("chain"), + PayTo: cmd.String("pay-to"), + Asset: monetizeapi.ServiceOfferAsset{Symbol: cmd.String("asset")}, + Amount: cmd.String("reward"), + Escrow: monetizeapi.ServiceBountyEscrow{ + Scheme: cmd.String("escrow-scheme"), + Facilitator: cmd.String("facilitator"), + Mode: "auto", + }, + }, + Eval: monetizeapi.ServiceBountyEval{ + K: evalK, + Mode: evalMode, + Selection: t.Eval.Selection, + Payment: monetizeapi.ServiceBountyEvalPayment{ + Asset: t.Eval.Payment.Asset, + PerEvaluator: t.Eval.Payment.PerEvaluator, + FundedBy: t.Eval.Payment.FundedBy, + Settle: t.Eval.Payment.Settle, + }, + }, + Trust: monetizeapi.ServiceBountyTrust{ReputationGate: true}, + Deadline: deadline, + MaxFulfillers: int64(cmd.Int("max-fulfillers")), + }, + } + + if bond := cmd.String("bond"); bond != "" { + sb.Spec.Trust.SelfBond = monetizeapi.ServiceBountySelfBond{Required: true, Amount: bond, Token: cmd.String("asset")} + } + + if cmd.Bool("dry-run") { + out, err := json.MarshalIndent(sb, "", " ") + if err != nil { + return err + } + fmt.Printf("# ServiceBounty (dry-run)\n%s\n", out) + return nil + } + + printBountyCostPreview(u, &sb, t) + if !cmd.Bool("yes") && !u.Confirm("Proceed?", true) { + return fmt.Errorf("aborted") + } + + applyOut, err := kubectlApplyOutput(cfg, sb) + if err != nil { + return fmt.Errorf("apply ServiceBounty: %w", err) + } + fmt.Print(applyOut) + fmt.Printf("\nBounty posted. Check status: obol bounty status %s -n %s\n", name, sb.Namespace) + return nil +} + +// printBountyCostPreview shows the poster's full commitment before apply: the +// escrowed reward leg AND the OBOL eval bill (k × perEvaluator, paid to +// evaluators win-or-lose). Verification-by-default means the eval line is the +// part posters haven't already priced in — never let it surprise them. +func printBountyCostPreview(u *ui.UI, sb *monetizeapi.ServiceBounty, t bounty.TaskType) { + u.Print("──────────────────────────────────────────────────────────────") + u.Print(fmt.Sprintf(" Bounty: %s (%s)", sb.Name, sb.Spec.Task.TypeRef)) + u.Print(fmt.Sprintf(" Reward: %s %s on %s (%s escrow)", + sb.Spec.Reward.Amount, sb.Spec.Reward.Asset.Symbol, sb.Spec.Reward.Network, sb.Spec.Reward.Escrow.Scheme)) + if sb.Spec.Eval.Mode == monetizeapi.EvalModeDangerouslySkipped { + u.Warnf(" Verification: SKIPPED (--dangerously-skip-verification) — poster-as-judge, bounty marked unverified, no reputation feedback") + } else { + per := sb.Spec.Eval.Payment.PerEvaluator + line := fmt.Sprintf(" Verification: %d evaluators × %s %s", sb.Spec.Eval.K, per, sb.Spec.Eval.Payment.Asset) + if perF, err := strconv.ParseFloat(per, 64); err == nil { + line += fmt.Sprintf(" = %.2f %s", float64(sb.Spec.Eval.K)*perF, sb.Spec.Eval.Payment.Asset) + } + u.Print(line + " (poster-funded, paid win-or-lose)") + } + if sb.Spec.Trust.SelfBond.Required { + u.Print(fmt.Sprintf(" Fulfiller bond: %s %s (refundable; forfeited on rejected work)", sb.Spec.Trust.SelfBond.Amount, sb.Spec.Trust.SelfBond.Token)) + } + if sb.Spec.Deadline != nil { + u.Print(fmt.Sprintf(" Deadline: %s (auto-refund past it)", sb.Spec.Deadline.UTC().Format(time.RFC3339))) + } + u.Print("──────────────────────────────────────────────────────────────") +} + +// ── lifecycle verbs ───────────────────────────────────────────────────────── +// +// claim/submit/accept/reject write the controller's annotation channel +// (obol.org/claim|commit|submit|verdict); the reconcile loop validates and +// promotes them into controller-owned status. + +func bountyResource() string { return "servicebounties.obol.org" } + +func bountyListCommand(cfg *config.Config) *cli.Command { + return &cli.Command{ + Name: "list", + Usage: "List ServiceBounties", + Flags: []cli.Flag{ + &cli.StringFlag{Name: "namespace", Aliases: []string{"n"}, Usage: "Namespace (default: all namespaces)"}, + }, + Action: func(ctx context.Context, cmd *cli.Command) error { + bin, kc := kubectl.Paths(cfg) + args := []string{"get", bountyResource()} + if ns := cmd.String("namespace"); ns != "" { + args = append(args, "-n", ns) + } else { + args = append(args, "-A") + } + out, err := kubectl.Output(bin, kc, args...) + if err != nil { + return err + } + fmt.Print(out) + return nil + }, + } +} + +func bountyStatusCommand(cfg *config.Config) *cli.Command { + return &cli.Command{ + Name: "status", + Usage: "Show a bounty's phase, conditions, claims, and escrow state", + ArgsUsage: "", + Flags: []cli.Flag{ + &cli.StringFlag{Name: "namespace", Aliases: []string{"n"}, Usage: "Namespace", Value: "hermes-obol-agent"}, + }, + Action: func(ctx context.Context, cmd *cli.Command) error { + name := cmd.Args().First() + if name == "" { + return fmt.Errorf("missing bounty name: obol bounty status ") + } + namespace := cmd.String("namespace") + sb, err := getBountyCLI(cfg, namespace, name) + if err != nil { + return err + } + + fmt.Printf("%s (%s)\n", sb.Name, sb.Spec.Task.TypeRef) + fmt.Printf(" Phase: %s\n", sb.Status.Phase) + fmt.Printf(" Reward: %s %s on %s (escrow: %s)\n", sb.Spec.Reward.Amount, sb.Spec.Reward.Asset.Symbol, sb.Spec.Reward.Network, valueOr(sb.Status.EscrowState, "not reserved")) + if sb.Status.EscrowSpender != "" { + fmt.Printf(" Escrow spender: %s (bind your Permit2 vouchers to this executor)\n", sb.Status.EscrowSpender) + } + if sb.Status.CaptureTxHash != "" { + fmt.Printf(" Payout: %s\n", sb.Status.CaptureTxHash) + } + if sb.Status.RefundTxHash != "" { + fmt.Printf(" Refund: %s\n", sb.Status.RefundTxHash) + } + if sb.Status.ReportURI != "" { + fmt.Printf(" Report: %s\n", sb.Status.ReportURI) + } + for _, claim := range sb.Status.Claims { + fmt.Printf(" Claim: %s phase=%s commit=%s\n", claim.FulfillerAddress, claim.Phase, valueOr(claim.CommitHash, "-")) + } + if sb.Status.BondState != "" { + fmt.Printf(" Bond: %s\n", sb.Status.BondState) + } + if seed := sb.Status.PanelSeed; seed != nil { + fmt.Printf(" Panel seed: source=%s", seed.Source) + if seed.Round > 0 { + fmt.Printf(" round=%d", seed.Round) + } + fmt.Println() + } + if len(sb.Status.Evaluations) > 0 { + fmt.Printf(" Evaluations (quorum k=%d, median>=50 verifies):\n", sb.Spec.Eval.K) + if sb.Status.RevealDeadline != nil { + fmt.Printf(" reveal window closes %s\n", sb.Status.RevealDeadline.UTC().Format(time.RFC3339)) + } + printBountyEvaluations(sb.Status.Evaluations, " ") + if sb.Status.EvalBudgetState != "" { + fmt.Printf(" eval budget: %s", sb.Status.EvalBudgetState) + if sb.Status.EvalPayoutTxHash != "" { + fmt.Printf(" payout=%s", sb.Status.EvalPayoutTxHash) + } + fmt.Println() + } + } + if esc := sb.Status.Escalation; esc != nil { + fmt.Printf(" Escalation (round %d): %s\n", esc.Round, valueOr(esc.Reason, "-")) + fmt.Printf(" budget: %s\n", valueOr(esc.BudgetState, "not reserved")) + if esc.VoucherDeadline != nil { + fmt.Printf(" voucher deadline %s\n", esc.VoucherDeadline.UTC().Format(time.RFC3339)) + } + if esc.RevealDeadline != nil { + fmt.Printf(" reveal window closes %s\n", esc.RevealDeadline.UTC().Format(time.RFC3339)) + } + for _, seat := range esc.Panel { + fmt.Printf(" panel: %s seat=%s\n", seat.Address, seat.Seat) + } + printBountyEvaluations(esc.Evaluations, " ") + } + fmt.Println(" Conditions:") + for _, condition := range sb.Status.Conditions { + fmt.Printf(" %-15s %-5s %-22s %s\n", condition.Type, condition.Status, condition.Reason, condition.Message) + } + printBountyVoucherNextSteps(sb, namespace) + return nil + }, + } +} + +func bountyClaimCommand(cfg *config.Config) *cli.Command { + return &cli.Command{ + Name: "claim", + Usage: "Claim a bounty as a fulfiller (binds your payout address; optionally sign the self-bond voucher)", + ArgsUsage: "", + Flags: []cli.Flag{ + &cli.StringFlag{Name: "namespace", Aliases: []string{"n"}, Usage: "Namespace", Value: "hermes-obol-agent"}, + &cli.StringFlag{Name: "address", Usage: "[REQUIRED] Fulfiller payout address (0x...)", Required: true}, + &cli.StringFlag{Name: "commit", Usage: "Optional commit hash (binds you to a specific deliverable before reveal)"}, + &cli.StringFlag{Name: "bond-key", Usage: "Hex private key to sign the self-bond Permit2 voucher locally (or use --bond-signer-url)"}, + &cli.StringFlag{Name: "bond-signer-url", Usage: "Remote-signer base URL to sign the self-bond voucher without exposing a key"}, + &cli.StringFlag{Name: "bond-recipient", Usage: "Bond forfeiture recipient (default: the poster's spec.reward.payTo address)"}, + &cli.StringFlag{Name: "spender", Usage: "Escrow facilitator address to bind as the only executor (default: status.escrowSpender)"}, + &cli.IntFlag{Name: "deadline-hours", Usage: "Bond voucher expiry in hours from now", Value: 72}, + }, + Action: func(ctx context.Context, cmd *cli.Command) error { + name := cmd.Args().First() + if name == "" { + return fmt.Errorf("missing bounty name: obol bounty claim --address 0x...") + } + annotations := []string{"obol.org/claim=" + cmd.String("address")} + if commit := cmd.String("commit"); commit != "" { + annotations = append(annotations, "obol.org/commit="+commit) + } + if err := annotateBountyCLI(cfg, cmd.String("namespace"), name, annotations); err != nil { + return err + } + + // Optional self-bond voucher: the FULFILLER's own funds, signed by + // their wallet (never the controller's), forfeited to the poster + // only on rejected work. + bondKey, bondSigner := cmd.String("bond-key"), cmd.String("bond-signer-url") + if bondKey == "" && bondSigner == "" { + return nil + } + return attachBountyBondVoucher(ctx, cfg, cmd, name, bondKey, bondSigner) + }, + } +} + +// attachBountyBondVoucher builds, signs, and ferries the fulfiller's self-bond +// voucher (annotation obol.org/bond-voucher, nonce leg bond). The recipient is +// the poster's payout address (spec.reward.payTo) — the bond is forfeited TO +// the poster on rejected work — overridable / required via --bond-recipient +// when the spec field is absent. +func attachBountyBondVoucher(ctx context.Context, cfg *config.Config, cmd *cli.Command, name, bondKey, bondSigner string) error { + namespace := cmd.String("namespace") + sb, err := getBountyCLI(cfg, namespace, name) + if err != nil { + return err + } + bond := sb.Spec.Trust.SelfBond + if strings.TrimSpace(bond.Amount) == "" { + return fmt.Errorf("bounty %s declares no self-bond (spec.trust.selfBond.amount is empty) — nothing to sign", name) + } + recipient := cmd.String("bond-recipient") + if recipient == "" { + recipient = sb.Spec.Reward.PayTo + } + if recipient == "" { + return fmt.Errorf("bounty %s has no poster payout address (spec.reward.payTo) — pass --bond-recipient 0x... explicitly", name) + } + if !common.IsHexAddress(recipient) { + return fmt.Errorf("bond recipient %q is not a 0x address", recipient) + } + + symbol := bond.Token + if symbol == "" { + symbol = sb.Spec.Reward.Asset.Symbol + } + token, err := resolveBountyToken(symbol, sb.Spec.Reward.Network) + if err != nil { + return err + } + amount, err := humanToAtomic(bond.Amount, token.Decimals) + if err != nil { + return fmt.Errorf("bond amount: %w", err) + } + spender, err := resolveBountySpender(cmd.String("spender"), sb.Status.EscrowSpender) + if err != nil { + return err + } + + voucher := escrow.Permit2Voucher{ + Token: token.Address, + Network: sb.Spec.Reward.Network, + Spender: spender, + Nonce: bountyVoucherNonce(string(sb.UID), "bond"), + Deadline: bountyVoucherDeadline(int64(cmd.Int("deadline-hours"))), + Recipients: []escrow.BatchRecipient{ + {Address: common.HexToAddress(recipient).Hex(), Amount: amount}, + }, + } + fmt.Printf("Self-bond: %s %s (%s atomic) -> poster %s on %s (refundable; forfeited only on rejected work)\n", + bond.Amount, symbol, amount, common.HexToAddress(recipient).Hex(), sb.Spec.Reward.Network) + return attachBountyVoucher(ctx, cfg, namespace, name, bountyBondVoucherAnnotation, &voucher, bondKey, bondSigner) +} + +func bountySubmitCommand(cfg *config.Config) *cli.Command { + return &cli.Command{ + Name: "submit", + Usage: "Submit a deliverable for a claimed bounty", + ArgsUsage: "", + Flags: []cli.Flag{ + &cli.StringFlag{Name: "namespace", Aliases: []string{"n"}, Usage: "Namespace", Value: "hermes-obol-agent"}, + &cli.StringFlag{Name: "result-hash", Usage: "[REQUIRED] Hash of the deliverable (reveals the commit)", Required: true}, + &cli.StringFlag{Name: "report-uri", Usage: "URI of the A2UI report (local agent hierarchy in v1)"}, + }, + Action: func(ctx context.Context, cmd *cli.Command) error { + name := cmd.Args().First() + if name == "" { + return fmt.Errorf("missing bounty name: obol bounty submit --result-hash 0x...") + } + submission, err := json.Marshal(map[string]string{ + "resultHash": cmd.String("result-hash"), + "reportURI": cmd.String("report-uri"), + }) + if err != nil { + return err + } + return annotateBountyCLI(cfg, cmd.String("namespace"), name, []string{"obol.org/submit=" + string(submission)}) + }, + } +} + +func bountyVerdictCommand(cfg *config.Config, verdict, usage string) *cli.Command { + return &cli.Command{ + Name: verdict, + Usage: usage, + ArgsUsage: "", + Flags: []cli.Flag{ + &cli.StringFlag{Name: "namespace", Aliases: []string{"n"}, Usage: "Namespace", Value: "hermes-obol-agent"}, + &cli.StringFlag{Name: "reason", Usage: "Reason (recorded in the Verified condition; reject only)"}, + }, + Action: func(ctx context.Context, cmd *cli.Command) error { + name := cmd.Args().First() + if name == "" { + return fmt.Errorf("missing bounty name: obol bounty %s ", verdict) + } + value := verdict + if verdict == "reject" { + value = "reject:" + cmd.String("reason") + } + return annotateBountyCLI(cfg, cmd.String("namespace"), name, []string{"obol.org/verdict=" + value}) + }, + } +} + +func annotateBountyCLI(cfg *config.Config, namespace, name string, annotations []string) error { + bin, kc := kubectl.Paths(cfg) + args := append([]string{"annotate", bountyResource(), name, "-n", namespace, "--overwrite"}, annotations...) + out, err := kubectl.Output(bin, kc, args...) + if err != nil { + return err + } + fmt.Print(out) + fmt.Printf("Check status: obol bounty status %s -n %s\n", name, namespace) + return nil +} + +func valueOr(value, fallback string) string { + if strings.TrimSpace(value) == "" { + return fallback + } + return value +} + +// ── poster-side voucher signing (fund / claim-bond / eval fund) ───────────── +// +// A Permit2 voucher is the poster's (or fulfiller's, for the bond) signed +// authorization the escrow facilitator executes. The CLI signs it locally +// (--key) or via the agent remote-signer (--signer-url) and ferries it to the +// controller on an annotation. The controller NEVER signs — it only attaches +// the voucher to the matching escrow reservation. + +// bountyVoucherNonce derives the Permit2 unordered nonce DETERMINISTICALLY as +// the uint256 of keccak256("|") with leg one of reward, bond, +// eval, eval-r1. Re-running a fund command re-signs the SAME nonce, so +// re-funding is idempotent and a nonce already consumed on-chain can never be +// double-captured. +func bountyVoucherNonce(bountyUID, leg string) string { + return new(big.Int).SetBytes(ethcrypto.Keccak256([]byte(bountyUID + "|" + leg))).String() +} + +// humanToAtomic converts a human-unit decimal amount (e.g. "500.00") to +// atomic token units ("500000000" at 6 decimals) without float rounding. +// Shared with the controller's settle paths via escrow.HumanToAtomic so the +// CLI-signed voucher seats and the controller's capture recipients can never +// drift apart in units. +func humanToAtomic(amount string, decimals int) (string, error) { + return escrow.HumanToAtomic(amount, decimals) +} + +// resolveBountySpender picks the escrow facilitator address the voucher must +// bind as its only executor: the --spender override, else status.escrowSpender +// (ferried from the facilitator's reserve receipt). +func resolveBountySpender(override, statusSpender string) (string, error) { + if override != "" { + if !common.IsHexAddress(override) { + return "", fmt.Errorf("--spender %q is not a 0x address", override) + } + return common.HexToAddress(override).Hex(), nil + } + if strings.TrimSpace(statusSpender) == "" { + return "", fmt.Errorf("status.escrowSpender is not set yet and no --spender was given — the escrow facilitator reports its address on the first reserve receipt; wait for the controller to reconcile (obol bounty status) or pass --spender 0x... explicitly") + } + if !common.IsHexAddress(statusSpender) { + return "", fmt.Errorf("status.escrowSpender %q is not a 0x address — pass --spender explicitly", statusSpender) + } + return common.HexToAddress(statusSpender).Hex(), nil +} + +// resolveBountyToken looks the payment token up in the x402 registry and +// returns its contract address + decimals for the given network. +func resolveBountyToken(symbol, network string) (x402verifier.TokenEntry, error) { + entry, ok := x402verifier.ResolveToken(symbol, network) + if !ok { + return x402verifier.TokenEntry{}, fmt.Errorf("token %q is not registered on network %q (supported: %s)", + symbol, network, strings.Join(x402verifier.SupportedTokens(), ", ")) + } + return entry, nil +} + +// signBountyVoucher signs the voucher with the local hex key or the remote +// signer, then verifies the result against the spender binding. Exactly the +// poster's wallet authorizes funds — the controller never signs. +func signBountyVoucher(ctx context.Context, v *escrow.Permit2Voucher, keyHex, signerURL string) error { + chainID, err := escrow.ChainIDForNetwork(v.Network) + if err != nil { + return err + } + switch { + case keyHex != "": + key, err := ethcrypto.HexToECDSA(strings.TrimPrefix(strings.TrimPrefix(keyHex, "0x"), "0X")) + if err != nil { + return fmt.Errorf("parse signing key: %w", err) + } + if err := escrow.SignVoucher(v, chainID, key); err != nil { + return err + } + case signerURL != "": + signer := erc8004.NewRemoteSigner(signerURL) + addr, err := signer.GetAddress(ctx) + if err != nil { + return err + } + v.Owner = addr.Hex() + _, remote, err := escrow.VoucherTypedData(*v, chainID) + if err != nil { + return err + } + sig, err := signer.SignTypedData(ctx, addr, remote) + if err != nil { + return err + } + v.Signature = sig + default: + return fmt.Errorf("no signer: pass --key or --signer-url — the controller NEVER signs; only your wallet can authorize funds") + } + return escrow.VerifyVoucher(*v, chainID, common.HexToAddress(v.Spender)) +} + +// attachBountyVoucher signs the voucher and ferries it to the controller on +// the given annotation. +func attachBountyVoucher(ctx context.Context, cfg *config.Config, namespace, name, annotation string, v *escrow.Permit2Voucher, keyHex, signerURL string) error { + if err := signBountyVoucher(ctx, v, keyHex, signerURL); err != nil { + return err + } + raw, err := json.Marshal(v) + if err != nil { + return err + } + fmt.Printf("Voucher signed by %s (spender %s, nonce %s, deadline %s)\n", + v.Owner, v.Spender, v.Nonce, time.Unix(v.Deadline, 0).UTC().Format(time.RFC3339)) + fmt.Println("Nonce is deterministic per (bounty, leg): re-running re-signs the same nonce, so re-funding is idempotent and a consumed nonce can never be double-captured.") + return annotateBountyCLI(cfg, namespace, name, []string{annotation + "=" + string(raw)}) +} + +// getBountyCLI fetches and decodes one ServiceBounty. +func getBountyCLI(cfg *config.Config, namespace, name string) (*monetizeapi.ServiceBounty, error) { + bin, kc := kubectl.Paths(cfg) + out, err := kubectl.Output(bin, kc, "get", bountyResource(), name, "-n", namespace, "-o", "json") + if err != nil { + return nil, err + } + var sb monetizeapi.ServiceBounty + if err := json.Unmarshal([]byte(out), &sb); err != nil { + return nil, fmt.Errorf("decode bounty: %w", err) + } + return &sb, nil +} + +// bountyVoucherDeadline turns --deadline-hours into a unix voucher expiry. +func bountyVoucherDeadline(hours int64) int64 { + return time.Now().Add(time.Duration(hours) * time.Hour).Unix() +} + +// bountyFundCommand signs + attaches the poster's Permit2 reward voucher: +// one recipient seat binding the claimed fulfiller to the full reward amount. +func bountyFundCommand(cfg *config.Config) *cli.Command { + return &cli.Command{ + Name: "fund", + Usage: "Sign + attach the reward escrow voucher (your wallet signs; the controller NEVER does)", + ArgsUsage: "", + Flags: []cli.Flag{ + &cli.StringFlag{Name: "namespace", Aliases: []string{"n"}, Usage: "Namespace", Value: "hermes-obol-agent"}, + &cli.StringFlag{Name: "key", Usage: "Hex private key to sign the Permit2 voucher locally (or use --signer-url)"}, + &cli.StringFlag{Name: "signer-url", Usage: "Remote-signer base URL (e.g. http://127.0.0.1:9000) to sign without exposing a key"}, + &cli.StringFlag{Name: "spender", Usage: "Escrow facilitator address to bind as the only executor (default: status.escrowSpender)"}, + &cli.IntFlag{Name: "deadline-hours", Usage: "Voucher expiry in hours from now (the hard on-chain guarantee)", Value: 72}, + }, + Action: func(ctx context.Context, cmd *cli.Command) error { + name := cmd.Args().First() + if name == "" { + return fmt.Errorf("missing bounty name: obol bounty fund (--key | --signer-url )") + } + namespace := cmd.String("namespace") + sb, err := getBountyCLI(cfg, namespace, name) + if err != nil { + return err + } + if len(sb.Status.Claims) == 0 || sb.Status.Claims[0].FulfillerAddress == "" { + return fmt.Errorf("bounty %s has no claim yet — the reward voucher binds the fulfiller's payout seat, so fund AFTER `obol bounty claim`", name) + } + fulfiller := sb.Status.Claims[0].FulfillerAddress + + token, err := resolveBountyToken(sb.Spec.Reward.Asset.Symbol, sb.Spec.Reward.Network) + if err != nil { + return err + } + amount, err := humanToAtomic(sb.Spec.Reward.Amount, token.Decimals) + if err != nil { + return fmt.Errorf("reward amount: %w", err) + } + spender, err := resolveBountySpender(cmd.String("spender"), sb.Status.EscrowSpender) + if err != nil { + return err + } + + voucher := escrow.Permit2Voucher{ + Token: token.Address, + Network: sb.Spec.Reward.Network, + Spender: spender, + Nonce: bountyVoucherNonce(string(sb.UID), "reward"), + Deadline: bountyVoucherDeadline(int64(cmd.Int("deadline-hours"))), + Recipients: []escrow.BatchRecipient{ + {Address: fulfiller, Amount: amount}, + }, + } + fmt.Printf("Funding reward: %s %s (%s atomic) -> fulfiller %s on %s\n", + sb.Spec.Reward.Amount, sb.Spec.Reward.Asset.Symbol, amount, fulfiller, sb.Spec.Reward.Network) + return attachBountyVoucher(ctx, cfg, namespace, name, bountyRewardVoucherAnnotation, + &voucher, cmd.String("key"), cmd.String("signer-url")) + }, + } +} + +// bountyEvalFundRecipients mirrors the controller's evalBudgetTotal math for +// round 0 (counting seats: full price, probation at half price, shadows free) +// and reserveEscalationBudget for round 1 (every seat full price). +func bountyEvalFundRecipients(panel []monetizeapi.ServiceBountyPanelSeat, perAtomic *big.Int, escalation bool) []escrow.BatchRecipient { + half := new(big.Int).Div(perAtomic, big.NewInt(2)) + var recipients []escrow.BatchRecipient + for _, seat := range panel { + if !escalation && seat.Seat == monetizeapi.PanelSeatShadow { + continue // shadows evaluate free — never a paid voucher seat + } + amount := perAtomic + if !escalation && seat.Seat == monetizeapi.PanelSeatProbation { + amount = half // newcomer discount passed to the poster + } + recipients = append(recipients, escrow.BatchRecipient{Address: seat.Address, Amount: amount.String()}) + } + return recipients +} + +// bountyEvalFundCommand signs + attaches the poster's eval-budget voucher: +// one seat per counting panel evaluator. When the escalation budget is +// AwaitingVoucher it targets the round-1 panel instead (full price, voucher +// annotation obol.org/eval-voucher-r1, nonce leg eval-r1). +func bountyEvalFundCommand(cfg *config.Config) *cli.Command { + return &cli.Command{ + Name: "fund", + Usage: "Sign + attach the poster-funded eval-budget voucher (evaluators are paid win-or-lose; the controller NEVER signs)", + ArgsUsage: "", + Flags: []cli.Flag{ + &cli.StringFlag{Name: "namespace", Aliases: []string{"n"}, Usage: "Namespace", Value: "hermes-obol-agent"}, + &cli.StringFlag{Name: "key", Usage: "Hex private key to sign the Permit2 voucher locally (or use --signer-url)"}, + &cli.StringFlag{Name: "signer-url", Usage: "Remote-signer base URL to sign without exposing a key"}, + &cli.StringFlag{Name: "spender", Usage: "Escrow facilitator address to bind as the only executor (default: status.escrowSpender)"}, + &cli.IntFlag{Name: "deadline-hours", Usage: "Voucher expiry in hours from now", Value: 72}, + }, + Action: func(ctx context.Context, cmd *cli.Command) error { + name := cmd.Args().First() + if name == "" { + return fmt.Errorf("missing bounty name: obol bounty eval fund (--key | --signer-url )") + } + namespace := cmd.String("namespace") + sb, err := getBountyCLI(cfg, namespace, name) + if err != nil { + return err + } + per := strings.TrimSpace(sb.Spec.Eval.Payment.PerEvaluator) + if per == "" { + return fmt.Errorf("bounty %s has no eval payment leg (spec.eval.payment.perEvaluator is empty) — nothing to fund", name) + } + token, err := resolveBountyToken(sb.Spec.Eval.Payment.Asset, sb.Spec.Reward.Network) + if err != nil { + return err + } + perAtomicStr, err := humanToAtomic(per, token.Decimals) + if err != nil { + return fmt.Errorf("perEvaluator amount: %w", err) + } + perAtomic, _ := new(big.Int).SetString(perAtomicStr, 10) + + // Escalation targeting: a round-1 panel waiting on its budget wins. + leg, annotation := "eval", bountyEvalVoucherAnnotation + panel := sb.Status.EvaluatorPanel + escalation := false + if esc := sb.Status.Escalation; esc != nil && esc.BudgetState == escrow.StateAwaitingVoucher { + leg, annotation = "eval-r1", bountyEvalVoucherR1Annotation + panel = esc.Panel + escalation = true + } + if len(panel) == 0 { + return fmt.Errorf("bounty %s has no evaluator panel selected yet — wait for the controller to draw the panel (obol bounty status)", name) + } + recipients := bountyEvalFundRecipients(panel, perAtomic, escalation) + if len(recipients) == 0 { + return fmt.Errorf("bounty %s panel has no counting seats to fund", name) + } + spender, err := resolveBountySpender(cmd.String("spender"), sb.Status.EscrowSpender) + if err != nil { + return err + } + + voucher := escrow.Permit2Voucher{ + Token: token.Address, + Network: sb.Spec.Reward.Network, + Spender: spender, + Nonce: bountyVoucherNonce(string(sb.UID), leg), + Deadline: bountyVoucherDeadline(int64(cmd.Int("deadline-hours"))), + Recipients: recipients, + } + round := "round-0 quorum" + if escalation { + round = fmt.Sprintf("escalation round %d", sb.Status.Escalation.Round) + } + fmt.Printf("Funding eval budget (%s): %d seat(s) x %s %s on %s (probation seats at half price)\n", + round, len(recipients), per, sb.Spec.Eval.Payment.Asset, sb.Spec.Reward.Network) + return attachBountyVoucher(ctx, cfg, namespace, name, annotation, + &voucher, cmd.String("key"), cmd.String("signer-url")) + }, + } +} + +// bountyFeedbackCommand prints ERC-8004 giveFeedback calldata for the poster +// to score the fulfiller from the settled verdict — submitted with the +// poster's OWN wallet, exactly like `obol bounty eval calldata`. +func bountyFeedbackCommand(cfg *config.Config) *cli.Command { + return &cli.Command{ + Name: "feedback", + Usage: "Print ERC-8004 giveFeedback calldata for the fulfiller, scored from the verdict (the controller NEVER signs)", + ArgsUsage: "", + Flags: []cli.Flag{ + &cli.StringFlag{Name: "namespace", Aliases: []string{"n"}, Usage: "Namespace", Value: "hermes-obol-agent"}, + &cli.Int64Flag{Name: "agent-id", Usage: "[REQUIRED] The fulfiller's ERC-8004 agent id (Identity Registry tokenId)", Required: true}, + &cli.StringFlag{Name: "feedback-uri", Usage: "Optional URI of the bounty report backing the feedback"}, + }, + Action: func(ctx context.Context, cmd *cli.Command) error { + name := cmd.Args().First() + if name == "" { + return fmt.Errorf("missing bounty name: obol bounty feedback --agent-id N") + } + sb, err := getBountyCLI(cfg, cmd.String("namespace"), name) + if err != nil { + return err + } + verdictSpoken := false + for _, condition := range sb.Status.Conditions { + if condition.Type == "Verified" { + verdictSpoken = true + break + } + } + if !verdictSpoken { + return fmt.Errorf("bounty %s has no Verified verdict yet — feedback scores the settled verdict (status.weightedScore)", name) + } + score := sb.Status.WeightedScore + if score < 0 || score > 100 { + return fmt.Errorf("status.weightedScore %d out of range 0-100", score) + } + + network := sb.Spec.Reward.Network + registry, err := erc8004.ReputationRegistryAddress(network) + if err != nil { + return err + } + calldata, err := erc8004.EncodeGiveFeedback( + big.NewInt(cmd.Int64("agent-id")), + big.NewInt(score), + 0, // score is already 0-100, no fixed-point scaling + sb.Spec.Task.TypeRef, + "obol-bounty", + "", + cmd.String("feedback-uri"), + common.Hash{}, + ) + if err != nil { + return err + } + fmt.Printf("Feedback: poster -> fulfiller %s, score %d/100 (from the %s verdict)\n", + valueOr(firstClaimAddress(sb), ""), score, valueOr(conditionReasonCLI(sb.Status.Conditions, "Verified"), "Verified")) + fmt.Printf("ReputationRegistry (%s): %s\n", network, registry) + fmt.Printf("Calldata: 0x%x\n", calldata) + fmt.Println("Submit with YOUR wallet (e.g. the agent remote-signer or cast send) — then pass the tx hash to `obol bounty eval reveal --validation-tx`.") + return nil + }, + } +} + +// printBountyEvaluations renders one round's evaluation lines with the +// grounded marker: [grounded] means the reveal is backed by an on-chain +// ERC-8004 validation entry for this bounty's eval-request hash. +func printBountyEvaluations(evaluations []monetizeapi.ServiceBountyEvaluation, indent string) { + for _, ev := range evaluations { + score := "-" + if ev.Phase == "Revealed" { + score = fmt.Sprintf("%d", ev.Score) + } + grounded := "" + if ev.Grounded { + grounded = " [grounded]" + } + fmt.Printf("%s%s seat=%-9s phase=%-10s score=%-4s withinBand=%-5v paid=%v%s\n", + indent, ev.Address, valueOr(ev.Seat, "open"), ev.Phase, score, ev.WithinBand, ev.Paid, grounded) + } +} + +// printBountyVoucherNextSteps prints the exact fund command for every escrow +// leg parked in AwaitingVoucher — the facilitator verified the reservation and +// is waiting for a signed Permit2 voucher to ferry in. +func printBountyVoucherNextSteps(sb *monetizeapi.ServiceBounty, namespace string) { + awaiting := escrow.StateAwaitingVoucher + if sb.Status.EscrowState == awaiting { + fmt.Printf(" Next: reward escrow is awaiting its voucher — run:\n") + fmt.Printf(" obol bounty fund %s -n %s (--key | --signer-url )\n", sb.Name, namespace) + } + if sb.Status.EvalBudgetState == awaiting { + fmt.Printf(" Next: eval budget is awaiting its voucher — run:\n") + fmt.Printf(" obol bounty eval fund %s -n %s (--key | --signer-url )\n", sb.Name, namespace) + } + if esc := sb.Status.Escalation; esc != nil && esc.BudgetState == awaiting { + fmt.Printf(" Next: escalation eval budget is awaiting its voucher — run:\n") + fmt.Printf(" obol bounty eval fund %s -n %s (--key | --signer-url ) # auto-targets the escalation panel\n", sb.Name, namespace) + } + if sb.Status.BondState == awaiting { + fmt.Printf(" Next: self-bond is awaiting its voucher — re-run claim with bond signing:\n") + fmt.Printf(" obol bounty claim %s -n %s --address <0x...> (--bond-key | --bond-signer-url )\n", sb.Name, namespace) + } +} + +func firstClaimAddress(sb *monetizeapi.ServiceBounty) string { + if len(sb.Status.Claims) == 0 { + return "" + } + return sb.Status.Claims[0].FulfillerAddress +} + +func conditionReasonCLI(conditions []monetizeapi.Condition, condType string) string { + for _, condition := range conditions { + if condition.Type == condType { + return condition.Reason + } + } + return "" +} diff --git a/cmd/obol/bounty_test.go b/cmd/obol/bounty_test.go new file mode 100644 index 00000000..191bb1d2 --- /dev/null +++ b/cmd/obol/bounty_test.go @@ -0,0 +1,315 @@ +package main + +import ( + "context" + "math/big" + "strings" + "testing" + "time" + + "github.com/ObolNetwork/obol-stack/internal/config" + "github.com/ObolNetwork/obol-stack/internal/monetizeapi" + "github.com/ObolNetwork/obol-stack/internal/x402/escrow" + "github.com/ethereum/go-ethereum/common" + ethcrypto "github.com/ethereum/go-ethereum/crypto" + "github.com/urfave/cli/v3" +) + +// ───────────────────────────────────────────────────────────────────────────── +// Command structure (house style: sell_test.go) +// ───────────────────────────────────────────────────────────────────────────── + +func testBountyCommand(t *testing.T) *cli.Command { + t.Helper() + return bountyCommand(&config.Config{}) +} + +func TestBountyFundCommand_Flags(t *testing.T) { + fund := findSubcommand(t, testBountyCommand(t), "fund") + flags := flagMap(fund) + + requireFlags(t, flags, "namespace", "key", "signer-url", "spender", "deadline-hours") + assertStringDefault(t, flags, "namespace", "hermes-obol-agent") + assertIntDefault(t, flags, "deadline-hours", 72) +} + +func TestBountyClaimCommand_BondVoucherFlags(t *testing.T) { + claim := findSubcommand(t, testBountyCommand(t), "claim") + flags := flagMap(claim) + + requireFlags(t, flags, "address", "bond-key", "bond-signer-url", "bond-recipient", "spender", "deadline-hours") + assertFlagRequired(t, flags, "address") + assertIntDefault(t, flags, "deadline-hours", 72) +} + +func TestBountyEvalFundCommand_Flags(t *testing.T) { + eval := findSubcommand(t, testBountyCommand(t), "eval") + fund := findSubcommand(t, eval, "fund") + flags := flagMap(fund) + + requireFlags(t, flags, "namespace", "key", "signer-url", "spender", "deadline-hours") + assertStringDefault(t, flags, "namespace", "hermes-obol-agent") + assertIntDefault(t, flags, "deadline-hours", 72) +} + +func TestBountyEvalCalldata_DerivationFlags(t *testing.T) { + eval := findSubcommand(t, testBountyCommand(t), "eval") + calldata := findSubcommand(t, eval, "calldata") + flags := flagMap(calldata) + + requireFlags(t, flags, "bounty", "address", "request-hash", "response", "network", "namespace") + assertFlagRequired(t, flags, "response") + + // --request-hash became an explicit OVERRIDE: it must no longer be + // required, since --bounty + --address derive it from the bounty UID. + if f, ok := flags["request-hash"].(*cli.StringFlag); !ok || f.Required { + t.Errorf("--request-hash must be an optional override (derive via --bounty/--address), got required=%v", ok && f.Required) + } +} + +func TestBountyFeedbackCommand_Flags(t *testing.T) { + feedback := findSubcommand(t, testBountyCommand(t), "feedback") + flags := flagMap(feedback) + + requireFlags(t, flags, "namespace", "agent-id", "feedback-uri") + assertStringDefault(t, flags, "namespace", "hermes-obol-agent") + + // --agent-id is an Int64Flag (ERC-8004 tokenIds exceed int32), which the + // shared assertFlagRequired helper doesn't cover — assert inline. + f, ok := flags["agent-id"].(*cli.Int64Flag) + if !ok { + t.Fatalf("flag --agent-id is %T, want *cli.Int64Flag", flags["agent-id"]) + } + if !f.Required { + t.Error("flag --agent-id should be required") + } +} + +// ───────────────────────────────────────────────────────────────────────────── +// Deterministic voucher nonce +// ───────────────────────────────────────────────────────────────────────────── + +func TestBountyVoucherNonce_Deterministic(t *testing.T) { + uid := "f81d4fae-7dec-11d0-a765-00a0c91e6bf6" + + // Re-running a fund command must re-derive the SAME nonce, so re-funding + // is idempotent and a consumed Permit2 nonce can never be double-captured. + if a, b := bountyVoucherNonce(uid, "reward"), bountyVoucherNonce(uid, "reward"); a != b { + t.Errorf("nonce not deterministic: %s != %s", a, b) + } + + // Cross-check the exact derivation: uint256 of keccak256("|reward"). + want := new(big.Int).SetBytes(ethcrypto.Keccak256([]byte(uid + "|reward"))).String() + if got := bountyVoucherNonce(uid, "reward"); got != want { + t.Errorf("nonce derivation drifted: got %s, want %s", got, want) + } + + // Every leg gets its own nonce — reward, bond, eval, and eval-r1 vouchers + // for the same bounty must never collide. + seen := map[string]string{} + for _, leg := range []string{"reward", "bond", "eval", "eval-r1"} { + nonce := bountyVoucherNonce(uid, leg) + if prev, dup := seen[nonce]; dup { + t.Errorf("legs %s and %s derived the same nonce %s", prev, leg, nonce) + } + seen[nonce] = leg + + // Permit2 nonces are uint256 decimal strings. + v, ok := new(big.Int).SetString(nonce, 10) + if !ok || v.Sign() < 0 || v.BitLen() > 256 { + t.Errorf("leg %s nonce %q is not a decimal uint256", leg, nonce) + } + } + + // Distinct bounties must derive distinct nonces for the same leg. + if bountyVoucherNonce(uid, "reward") == bountyVoucherNonce("other-uid", "reward") { + t.Error("distinct bounty UIDs derived the same reward nonce") + } +} + +// ───────────────────────────────────────────────────────────────────────────── +// Human → atomic amount conversion +// ───────────────────────────────────────────────────────────────────────────── + +func TestHumanToAtomic(t *testing.T) { + cases := []struct { + amount string + decimals int + want string + wantErr bool + }{ + {"500.00", 6, "500000000", false}, + {"0.5", 18, "500000000000000000", false}, + {"1", 6, "1000000", false}, + {"0.000001", 6, "1", false}, + {"1.230000", 2, "123", false}, // trailing zeros beyond precision OK + {".5", 6, "500000", false}, + {"0.0000001", 6, "", true}, // sub-atomic remainder + {"0", 6, "", true}, // must be positive + {"-1", 6, "", true}, + {"abc", 6, "", true}, + {"", 6, "", true}, + } + for _, tc := range cases { + got, err := humanToAtomic(tc.amount, tc.decimals) + if tc.wantErr { + if err == nil { + t.Errorf("humanToAtomic(%q, %d) = %q, want error", tc.amount, tc.decimals, got) + } + continue + } + if err != nil { + t.Errorf("humanToAtomic(%q, %d): %v", tc.amount, tc.decimals, err) + continue + } + if got != tc.want { + t.Errorf("humanToAtomic(%q, %d) = %q, want %q", tc.amount, tc.decimals, got, tc.want) + } + } +} + +// ───────────────────────────────────────────────────────────────────────────── +// Eval voucher seats mirror the controller's budget math +// ───────────────────────────────────────────────────────────────────────────── + +func TestBountyEvalFundRecipients_MirrorsControllerMath(t *testing.T) { + full := "0x1111111111111111111111111111111111111111" + probation := "0x2222222222222222222222222222222222222222" + shadow := "0x3333333333333333333333333333333333333333" + panel := []monetizeapi.ServiceBountyPanelSeat{ + {Address: full, Seat: monetizeapi.PanelSeatFull}, + {Address: probation, Seat: monetizeapi.PanelSeatProbation}, + {Address: shadow, Seat: monetizeapi.PanelSeatShadow}, + } + per := big.NewInt(1_000_000) + + // Round 0: full seat at full price, probation at half, shadow free — + // exactly the controller's evalBudgetTotal / settleEvalBudget math. + recipients := bountyEvalFundRecipients(panel, per, false) + if len(recipients) != 2 { + t.Fatalf("round-0 recipients = %d, want 2 (shadow evaluates free)", len(recipients)) + } + if recipients[0].Address != full || recipients[0].Amount != "1000000" { + t.Errorf("full seat = %+v, want %s at 1000000", recipients[0], full) + } + if recipients[1].Address != probation || recipients[1].Amount != "500000" { + t.Errorf("probation seat = %+v, want %s at 500000 (half price)", recipients[1], probation) + } + + // Voucher total must equal the controller's reserve: k×per − per/2 for + // one sitting probation seat (k=2 counting seats here). + total := new(big.Int) + for _, r := range recipients { + amount, ok := new(big.Int).SetString(r.Amount, 10) + if !ok { + t.Fatalf("recipient amount %q is not a decimal uint256", r.Amount) + } + total.Add(total, amount) + } + wantTotal := big.NewInt(2*1_000_000 - 1_000_000/2) + if total.Cmp(wantTotal) != 0 { + t.Errorf("voucher total = %s, want %s (k×per − per/2)", total, wantTotal) + } + + // Escalation round: every seat full price, no discount, no free seats — + // mirrors reserveEscalationBudget. + escPanel := []monetizeapi.ServiceBountyPanelSeat{ + {Address: full, Seat: monetizeapi.PanelSeatFull}, + {Address: probation, Seat: monetizeapi.PanelSeatFull}, + } + escRecipients := bountyEvalFundRecipients(escPanel, per, true) + if len(escRecipients) != 2 { + t.Fatalf("escalation recipients = %d, want 2", len(escRecipients)) + } + for _, r := range escRecipients { + if r.Amount != "1000000" { + t.Errorf("escalation seat %s = %s, want full price 1000000", r.Address, r.Amount) + } + } +} + +// ───────────────────────────────────────────────────────────────────────────── +// Voucher signing +// ───────────────────────────────────────────────────────────────────────────── + +func TestSignBountyVoucher_LocalKeyRoundTrip(t *testing.T) { + // anvil key 0 — test-only, never funded outside local forks. + key, err := ethcrypto.HexToECDSA("ac0974bec39a17e36ba4a6b4d238ff944bacb478cbed5efcae784d7bf4f2ff80") + if err != nil { + t.Fatalf("parse key: %v", err) + } + spender := common.HexToAddress("0x4444444444444444444444444444444444444444") + + voucher := escrow.Permit2Voucher{ + Token: "0x0a09371a8b011d5110656ceBCc70603e53FD2c78", + Network: "base-sepolia", + Spender: spender.Hex(), + Nonce: bountyVoucherNonce("uid-1", "reward"), + Deadline: time.Now().Add(time.Hour).Unix(), + Recipients: []escrow.BatchRecipient{ + {Address: "0x5555555555555555555555555555555555555555", Amount: "1000000"}, + }, + } + + // signBountyVoucher signs AND verifies against the spender binding. + if err := signBountyVoucher(context.Background(), &voucher, "0xac0974bec39a17e36ba4a6b4d238ff944bacb478cbed5efcae784d7bf4f2ff80", ""); err != nil { + t.Fatalf("signBountyVoucher: %v", err) + } + if want := ethcrypto.PubkeyToAddress(key.PublicKey).Hex(); voucher.Owner != want { + t.Errorf("voucher owner = %s, want signing key address %s", voucher.Owner, want) + } + chainID, err := escrow.ChainIDForNetwork(voucher.Network) + if err != nil { + t.Fatalf("ChainIDForNetwork: %v", err) + } + if err := escrow.VerifyVoucher(voucher, chainID, spender); err != nil { + t.Errorf("signed voucher does not verify: %v", err) + } +} + +func TestSignBountyVoucher_RequiresASigner(t *testing.T) { + voucher := escrow.Permit2Voucher{ + Token: "0x0a09371a8b011d5110656ceBCc70603e53FD2c78", + Network: "base-sepolia", + Spender: "0x4444444444444444444444444444444444444444", + Nonce: "1", + Deadline: time.Now().Add(time.Hour).Unix(), + Recipients: []escrow.BatchRecipient{ + {Address: "0x5555555555555555555555555555555555555555", Amount: "1"}, + }, + } + err := signBountyVoucher(context.Background(), &voucher, "", "") + if err == nil { + t.Fatal("expected error with neither --key nor --signer-url") + } + if !strings.Contains(err.Error(), "controller NEVER signs") { + t.Errorf("error %q must carry the controller-never-signs messaging", err) + } +} + +// ───────────────────────────────────────────────────────────────────────────── +// Spender resolution +// ───────────────────────────────────────────────────────────────────────────── + +func TestResolveBountySpender(t *testing.T) { + statusSpender := "0x4444444444444444444444444444444444444444" + + got, err := resolveBountySpender("", statusSpender) + if err != nil || got != common.HexToAddress(statusSpender).Hex() { + t.Errorf("status spender path = (%q, %v), want canonical %s", got, err, statusSpender) + } + + override := "0x5555555555555555555555555555555555555555" + got, err = resolveBountySpender(override, statusSpender) + if err != nil || got != common.HexToAddress(override).Hex() { + t.Errorf("override path = (%q, %v), want %s", got, err, override) + } + + if _, err := resolveBountySpender("", ""); err == nil { + t.Error("expected a helpful error when neither --spender nor status.escrowSpender is set") + } + + if _, err := resolveBountySpender("not-an-address", statusSpender); err == nil { + t.Error("expected error for malformed --spender") + } +} diff --git a/cmd/obol/buy.go b/cmd/obol/buy.go index fb19d923..6f055b55 100644 --- a/cmd/obol/buy.go +++ b/cmd/obol/buy.go @@ -59,6 +59,7 @@ func buyCommand(cfg *config.Config) *cli.Command { Usage: "Buy access to remote services via x402 micropayments", Commands: []*cli.Command{ buyInferenceCommand(cfg), + buyDatasetCommand(cfg), }, } } @@ -66,28 +67,45 @@ func buyCommand(cfg *config.Config) *cli.Command { func buyInferenceCommand(cfg *config.Config) *cli.Command { return &cli.Command{ Name: "inference", - Usage: "Buy paid inference from an x402-gated seller via the obol-agent", - ArgsUsage: "[]", - Description: `Pre-authorizes an x402-gated inference seller through an obol-agent's wallet. + Usage: "Buy inference for your agents — a hosted BYOK provider (Venice, OpenRouter, …) or an x402-gated seller", + ArgsUsage: "[|]", + Description: `Two ways to give your agents inference: -Hand the command a seller URL — either a storefront base -("https://inference.v1337.org") or a specific offer -("https://inference.v1337.org/services/aeon") — and the CLI will walk -/api/services.json, pick the inference offer, and pre-sign authorizations -via the agent's remote signer. + 1. Hosted provider (BYOK) — hand the command a provider id and it opens + that provider's API-key page in your browser, takes the key, and wires + your agents' LiteLLM gateway to it: -With no URL, the public ` + x402verifier.DefaultBuySellerURL + ` storefront is used. + obol buy inference venice + obol buy inference openrouter --free -In a TTY, the CLI prompts for auto-refill, request count, and -confirmation. Pass --yes / -y for non-interactive runs (CI, scripts) — ---count is required in that mode. + Built-in providers: venice, openrouter, nvidia, gmi, novita, + huggingface (plus anthropic, openai). The key is read from the + provider's env var when already set, so this stays non-interactive in CI. + + 2. x402-gated seller — hand it a seller URL (a storefront base like + "https://inference.v1337.org" or a specific offer ".../services/aeon") + and the CLI walks /api/services.json, picks the inference offer, and + pre-signs payment authorizations via the agent's remote signer. With no + argument, the public ` + x402verifier.DefaultBuySellerURL + ` storefront is used. + +In a TTY the seller flow prompts for auto-refill, request count, and +confirmation. Pass --yes / -y for non-interactive runs (--count required). Examples: - obol buy inference + obol buy inference venice + obol buy inference openrouter --free obol buy inference https://inference.v1337.org/services/aeon - obol buy inference https://seller.example/services/foo --yes --count 100 - obol buy inference https://seller.example/services/foo --auto-refill --refill-threshold 5 --refill-count 25`, + obol buy inference https://seller.example/services/foo --yes --count 100`, Flags: []cli.Flag{ + &cli.StringFlag{ + Name: "api-key", + Usage: "API key for a hosted provider (BYOK). Also read from the provider's env var when set.", + Sources: cli.EnvVars("LLM_API_KEY"), + }, + &cli.BoolFlag{ + Name: "free", + Usage: "For a hosted provider that has them, seed only the curated free-tier models (OpenRouter)", + }, &cli.StringFlag{ Name: "seller", Usage: "Seller URL (alternative to positional). When neither is set the default storefront is used.", @@ -160,6 +178,43 @@ Examples: } } +// runBuyInferenceProvider is the BYOK front door: open the provider's +// API-key page (hermes-style openurl), take the key (--api-key → env → +// prompt), then wire the LiteLLM gateway via the shared model-setup +// engine. No wallet, no x402 — this is hosted inference with the user's +// own key, the easiest way to get an agent talking to a model. +func runBuyInferenceProvider(cfg *config.Config, cmd *cli.Command, prof model.ProviderInfo) error { + u := getUI(cmd) + u.Infof("Connecting %s for your agents (bring-your-own-key)", prof.Name) + + apiKey := strings.TrimSpace(cmd.String("api-key")) + if apiKey == "" { + if key, envVar := model.ResolveAPIKey(prof.ID); key != "" { + apiKey = key + u.Infof("Using %s API key from %s", prof.Name, envVar) + } + } + + // openurl: send the operator to the provider's key page before we + // prompt for the key (skipped when a key is already in hand or non-TTY). + if apiKey == "" && prof.SignupURL != "" && u.IsTTY() && !u.IsJSON() { + u.Infof("Opening %s to create an API key …", prof.SignupURL) + if err := openBrowser(prof.SignupURL); err != nil { + u.Dim(fmt.Sprintf("(couldn't open a browser — visit %s)", prof.SignupURL)) + } + } + + var models []string + if m := strings.TrimSpace(cmd.String("model")); m != "" { + models = []string{m} + } + + // Shared engine: prompts for the key if still empty, seeds --free, + // resolves a model (registry default or live /v1/models), patches + // LiteLLM, and promotes + syncs the agents to use it. + return setupCloudProvider(cfg, u, prof, apiKey, models, cmd.Bool("free")) +} + // runBuyInference is the orchestrator for the new flow. Kept separate from // the cli.Command literal so the steps stay scannable: resolve agent → // resolve seller URL → pick catalog entry → resolve token+count+budget → @@ -167,6 +222,18 @@ Examples: func runBuyInference(ctx context.Context, cfg *config.Config, cmd *cli.Command) error { u := getUI(cmd) + // Front door: if the argument names a hosted provider in the registry + // (venice, openrouter, …) rather than a seller URL, run BYOK onboarding + // — open the provider's key page and wire the LiteLLM gateway. Ollama is + // local and free, so it's not a "buy" target. + arg := strings.TrimSpace(cmd.String("seller")) + if arg == "" { + arg = strings.TrimSpace(cmd.Args().First()) + } + if prof, ok := model.ProviderByID(arg); ok && prof.ID != model.ProviderOllama { + return runBuyInferenceProvider(cfg, cmd, prof) + } + u.Info("Purchasing remote inference for running Obol Agents") target, err := resolveBuyAgent(cfg, cmd) diff --git a/cmd/obol/buy_test.go b/cmd/obol/buy_test.go index 5b8bce16..a59f117e 100644 --- a/cmd/obol/buy_test.go +++ b/cmd/obol/buy_test.go @@ -8,6 +8,8 @@ import ( "github.com/ObolNetwork/obol-stack/internal/agentruntime" "github.com/ObolNetwork/obol-stack/internal/buy" + "github.com/ObolNetwork/obol-stack/internal/config" + "github.com/ObolNetwork/obol-stack/internal/model" "github.com/ObolNetwork/obol-stack/internal/schemas" ) @@ -600,3 +602,36 @@ func TestLooksLikeURL(t *testing.T) { } } } + +// TestBuyInference_BYOKFrontDoor pins the BYOK onboarding surface on +// `obol buy inference`: the command exposes --api-key/--free/--model, and +// every registry provider that isn't local Ollama is recognized as a +// hosted-provider argument (the dispatch the Action keys on). +func TestBuyInference_BYOKFrontDoor(t *testing.T) { + cmd := buyInferenceCommand(&config.Config{}) + + want := map[string]bool{"api-key": false, "free": false, "model": false, "seller": false} + for _, f := range cmd.Flags { + for _, n := range f.Names() { + if _, ok := want[n]; ok { + want[n] = true + } + } + } + for n, found := range want { + if !found { + t.Errorf("buy inference missing --%s flag", n) + } + } + + // Hosted providers route to BYOK onboarding; ollama does not (local). + for _, id := range []string{"venice", "openrouter", "nvidia", "gmi", "novita", "huggingface"} { + p, ok := model.ProviderByID(id) + if !ok || p.ID == model.ProviderOllama { + t.Errorf("provider %q should be a BYOK buy-inference target", id) + } + } + if p, ok := model.ProviderByID("ollama"); !ok || p.ID != model.ProviderOllama { + t.Errorf("ollama must remain a local (non-buy) provider") + } +} diff --git a/cmd/obol/dataset.go b/cmd/obol/dataset.go new file mode 100644 index 00000000..a0744d78 --- /dev/null +++ b/cmd/obol/dataset.go @@ -0,0 +1,495 @@ +package main + +// obol dataset — owner side of a versioned, membership-gated dataset offer. +// +// obol dataset from --name ingest a bundle as a new +// signed version (creates v1). +// obol dataset version --bundle append the next signed version. +// obol dataset publish host the artifact server on +// this machine + a Cloudflare +// tunnel; gate every byte. +// obol dataset approve admit a worker (membership). +// obol dataset verify walk the signed version chain. +// obol dataset status versions + members. +// +// The artifact server is the host gateway (same spirit as `obol sell +// inference` / `obol research publish`): it runs on the owner's machine, never +// in the cluster, and reaches remote buyers over the real internet via +// Cloudflare. Bytes never leave the host un-gated. + +import ( + "context" + "encoding/json" + "fmt" + "io" + "net" + "net/http" + "os" + "os/exec" + "os/signal" + "path/filepath" + "strconv" + "strings" + "syscall" + "time" + + "github.com/ObolNetwork/obol-stack/internal/config" + "github.com/ObolNetwork/obol-stack/internal/dataset" + "github.com/urfave/cli/v3" +) + +// datasetState lets approve/status reach a running publish server. +type datasetState struct { + ID string `json:"id"` + LocalAddr string `json:"local_addr"` + PublicURL string `json:"public_url"` + OwnerToken string `json:"owner_token"` +} + +func datasetCommand(cfg *config.Config) *cli.Command { + return &cli.Command{ + Name: "dataset", + Usage: "Publish and sell versioned, membership-gated datasets", + Commands: []*cli.Command{ + datasetFromCommand(cfg), + datasetVersionCommand(cfg), + datasetPublishCommand(cfg), + datasetApproveCommand(cfg), + datasetVerifyCommand(cfg), + datasetStatusCommand(cfg), + }, + } +} + +func datasetFromCommand(cfg *config.Config) *cli.Command { + return &cli.Command{ + Name: "from", + Usage: "Ingest a dataset bundle directory as a new signed version", + ArgsUsage: "", + Flags: []cli.Flag{&cli.StringFlag{Name: "name", Usage: "Dataset id", Required: true}}, + Action: func(_ context.Context, cmd *cli.Command) error { + if cmd.NArg() != 1 { + return fmt.Errorf("bundle directory required: obol dataset from --name ") + } + return appendDatasetVersion(cfg, cmd, strings.TrimSpace(cmd.String("name")), cmd.Args().First()) + }, + } +} + +func datasetVersionCommand(cfg *config.Config) *cli.Command { + return &cli.Command{ + Name: "version", + Usage: "Append the next signed version of an existing dataset", + ArgsUsage: "", + Flags: []cli.Flag{&cli.StringFlag{Name: "bundle", Usage: "New bundle directory", Required: true}}, + Action: func(_ context.Context, cmd *cli.Command) error { + if cmd.NArg() != 1 { + return fmt.Errorf("dataset id required: obol dataset version --bundle ") + } + id := strings.TrimSpace(cmd.Args().First()) + if _, err := os.Stat(datasetStorePath(cfg, id)); err != nil { + return fmt.Errorf("dataset %q not found — create it with 'obol dataset from'", id) + } + return appendDatasetVersion(cfg, cmd, id, cmd.String("bundle")) + }, + } +} + +func appendDatasetVersion(cfg *config.Config, cmd *cli.Command, id, bundleDir string) error { + u := getUI(cmd) + if id == "" { + return fmt.Errorf("dataset id required") + } + key, err := dataset.LoadOrCreateKey(datasetKeyPath(cfg, id)) + if err != nil { + return err + } + signer := dataset.NewEthSigner(key) + + manifestHash, artifactPath, fileHash, size, err := dataset.ReadBundle(bundleDir) + if err != nil { + return err + } + + store := dataset.NewStore(datasetStorePath(cfg, id)) + st, err := store.Load() + if err != nil { + return err + } + log := dataset.LogFromVersions(st.Versions) + v, err := log.Append(manifestHash, fileHash, size, signer, time.Now()) + if err != nil { + return err + } + + st.ID, st.GroupID, st.Versions = id, id, log.Versions() + if st.Artifacts == nil { + st.Artifacts = map[int]string{} + } + st.Artifacts[v.Seq] = artifactPath + if err := store.Save(st); err != nil { + return err + } + + u.Successf("Dataset %q version %d recorded", id, v.Seq) + u.Infof("Manifest hash: %s", v.ManifestHash) + u.Infof("File hash: %s", v.FileHash) + u.Infof("Size: %d bytes", v.Size) + u.Infof("Owner: %s", signer.SignerID()) + u.Dim("Publish it with: obol dataset publish " + id) + return nil +} + +func datasetPublishCommand(cfg *config.Config) *cli.Command { + return &cli.Command{ + Name: "publish", + Usage: "Host the dataset's artifact server and expose it over a Cloudflare tunnel", + ArgsUsage: "", + Flags: []cli.Flag{ + &cli.StringFlag{Name: "membership", Usage: "open | invite", Value: "invite"}, + &cli.IntFlag{Name: "port", Usage: "Local port (0 = pick a free one)", Value: 0}, + &cli.BoolFlag{Name: "no-tunnel", Usage: "Serve locally only"}, + }, + Action: func(ctx context.Context, cmd *cli.Command) error { + u := getUI(cmd) + if cmd.NArg() != 1 { + return fmt.Errorf("dataset id required: obol dataset publish ") + } + id := strings.TrimSpace(cmd.Args().First()) + + key, err := dataset.LoadOrCreateKey(datasetKeyPath(cfg, id)) + if err != nil { + return err + } + signer := dataset.NewEthSigner(key) + + store := dataset.NewStore(datasetStorePath(cfg, id)) + st, err := store.Load() + if err != nil { + return err + } + if len(st.Versions) == 0 { + return fmt.Errorf("dataset %q has no versions — run 'obol dataset from' first", id) + } + artifacts := dataset.NewFileArtifacts() + for seq, path := range st.Artifacts { + artifacts.Set(seq, path) + } + ents := dataset.NewEntitlements() + ents.Load(st.Entitlements) + + ownerToken, err := randomToken("obol-dataset-owner-") + if err != nil { + return err + } + srv := dataset.NewServer(dataset.Config{ + ID: id, + Membership: cmd.String("membership"), + OwnerToken: ownerToken, + OwnerSigner: signer.SignerID(), + Log: dataset.LogFromVersions(st.Versions), + Ents: ents, + Store: store, + Artifacts: artifacts, + Payments: forwardedPayment{}, + }) + + ln, err := net.Listen("tcp", fmt.Sprintf("127.0.0.1:%d", cmd.Int("port"))) + if err != nil { + return fmt.Errorf("listen: %w", err) + } + localAddr := "http://" + ln.Addr().String() + httpSrv := &http.Server{Handler: srv.Handler()} + go func() { _ = httpSrv.Serve(ln) }() + + publicURL := localAddr + var tunnel *exec.Cmd + if !cmd.Bool("no-tunnel") { + u.Info("Opening Cloudflare tunnel …") + if turl, tcmd, terr := startQuickTunnel(ctx, ln.Addr().String()); terr != nil { + u.Warnf("tunnel failed (%v) — serving locally only at %s", terr, localAddr) + } else { + publicURL, tunnel = turl, tcmd + } + } + + head, _ := dataset.LogFromVersions(st.Versions).Head() + _ = writeDatasetState(cfg, datasetState{ID: id, LocalAddr: localAddr, PublicURL: publicURL, OwnerToken: ownerToken}) + + u.Successf("Dataset %q published (head version %d)", id, head.Seq) + u.Infof("Public URL: %s", publicURL) + u.Infof("Owner: %s", signer.SignerID()) + u.Infof("Membership: %s", cmd.String("membership")) + u.Blank() + u.Bold("Buyers fetch with:") + u.Printf(" obol buy dataset %s --id %s --member-token ", publicURL, id) + if cmd.String("membership") == dataset.MembershipInvite { + u.Dim("Admit a worker's printed code: obol dataset approve ") + } + u.Dim("Ctrl-C to stop.") + + sigCtx, stop := signal.NotifyContext(ctx, os.Interrupt, syscall.SIGTERM) + defer stop() + <-sigCtx.Done() + + u.Info("Stopping …") + shutCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + _ = httpSrv.Shutdown(shutCtx) + if tunnel != nil && tunnel.Process != nil { + _ = tunnel.Process.Kill() + } + _ = removeDatasetState(cfg, id) + return nil + }, + } +} + +func datasetApproveCommand(cfg *config.Config) *cli.Command { + return &cli.Command{ + Name: "approve", + Usage: "Admit a worker to a dataset (the membership decision)", + ArgsUsage: "", + Flags: []cli.Flag{&cli.StringFlag{Name: "dataset", Usage: "Dataset id (defaults to the only running one)"}}, + Action: func(ctx context.Context, cmd *cli.Command) error { + u := getUI(cmd) + if cmd.NArg() != 1 { + return fmt.Errorf("user code required: obol dataset approve ") + } + st, err := loadDatasetState(cfg, cmd.String("dataset")) + if err != nil { + return err + } + body, _ := json.Marshal(map[string]string{"user_code": cmd.Args().First()}) + resp, err := datasetOwnerReq(ctx, st, http.MethodPost, "/auth/device/approve", body) + if err != nil { + return err + } + defer resp.Body.Close() + if resp.StatusCode != http.StatusOK { + b, _ := io.ReadAll(resp.Body) + return fmt.Errorf("approve failed (%d): %s", resp.StatusCode, strings.TrimSpace(string(b))) + } + u.Successf("Approved %s into %q", cmd.Args().First(), st.ID) + return nil + }, + } +} + +func datasetVerifyCommand(cfg *config.Config) *cli.Command { + return &cli.Command{ + Name: "verify", + Usage: "Walk a dataset's signed version chain (offline)", + ArgsUsage: "", + Action: func(_ context.Context, cmd *cli.Command) error { + u := getUI(cmd) + if cmd.NArg() != 1 { + return fmt.Errorf("dataset id required: obol dataset verify ") + } + id := strings.TrimSpace(cmd.Args().First()) + key, err := dataset.LoadOrCreateKey(datasetKeyPath(cfg, id)) + if err != nil { + return err + } + owner := dataset.NewEthSigner(key).SignerID() + st, err := dataset.NewStore(datasetStorePath(cfg, id)).Load() + if err != nil { + return err + } + log := dataset.LogFromVersions(st.Versions) + if err := log.Verify(dataset.EthVerifier{}, owner); err != nil { + return fmt.Errorf("chain INVALID: %w", err) + } + head, _ := log.Head() + u.Successf("Chain valid: %d version(s), head v%d, owner %s", log.Len(), head.Seq, owner) + return nil + }, + } +} + +func datasetStatusCommand(cfg *config.Config) *cli.Command { + return &cli.Command{ + Name: "status", + Usage: "Show versions and member count", + ArgsUsage: "", + Action: func(ctx context.Context, cmd *cli.Command) error { + u := getUI(cmd) + if cmd.NArg() != 1 { + return fmt.Errorf("dataset id required: obol dataset status ") + } + id := strings.TrimSpace(cmd.Args().First()) + st, err := dataset.NewStore(datasetStorePath(cfg, id)).Load() + if err != nil { + return err + } + log := dataset.LogFromVersions(st.Versions) + u.Bold(fmt.Sprintf("Dataset %s — %d version(s)", id, log.Len())) + for _, v := range log.Versions() { + u.Printf(" v%d %s %d bytes (%s…)", v.Seq, v.ManifestHash[:12], v.Size, v.Signature[:12]) + } + u.Infof("Entitled members: %d", len(st.Entitlements)) + return nil + }, + } +} + +// buyDatasetCommand is added to `obol buy` so buyers fetch with +// `obol buy dataset --id --member-token `. +func buyDatasetCommand(cfg *config.Config) *cli.Command { + return &cli.Command{ + Name: "dataset", + Usage: "Download a versioned dataset over HTTP, verifying its whole-file hash", + ArgsUsage: "", + Flags: []cli.Flag{ + &cli.StringFlag{Name: "id", Usage: "Dataset id (or embed /dataset/ in the URL)"}, + &cli.IntFlag{Name: "version", Usage: "Version to fetch (0 = head)"}, + &cli.StringFlag{Name: "member-token", Usage: "Member token (owner-issued or payment-minted)", Required: true}, + &cli.StringFlag{Name: "out", Usage: "Output file (default -v.jsonl)"}, + }, + Action: func(ctx context.Context, cmd *cli.Command) error { + u := getUI(cmd) + if cmd.NArg() != 1 { + return fmt.Errorf("seller URL required: obol buy dataset --id --member-token ") + } + base, id := splitDatasetURL(cmd.Args().First(), cmd.String("id")) + if id == "" { + return fmt.Errorf("dataset id required (pass --id or a /dataset/ URL)") + } + out := cmd.String("out") + if out == "" { + v := cmd.Int("version") + if v == 0 { + v = 1 + } + out = fmt.Sprintf("%s-v%d.jsonl", id, v) + } + u.Infof("Fetching %s (version %v) → %s", id, orHead(cmd.Int("version")), out) + res, err := dataset.Fetch(ctx, dataset.FetchOptions{ + BaseURL: base, ID: id, Version: cmd.Int("version"), + Token: cmd.String("member-token"), OutPath: out, + }) + if err != nil { + return err + } + if res.Resumed { + u.Dim("(resumed an interrupted download)") + } + u.Successf("Verified v%d: %d bytes, file hash %s", res.Version, res.Bytes, res.FileHash) + u.Dim("Manifest: " + res.ManifestHash) + return nil + }, + } +} + +// --- payment validation (behind the edge x402-verifier) --- + +// forwardedPayment trusts the edge x402-verifier to have proven a settled +// payment upstream; it extracts the paid version/amount from forwarded +// headers. It is only reachable on the membership-gated /join/paid route +// (never a raw public route). +type forwardedPayment struct{} + +func (forwardedPayment) Validate(r *http.Request, _ string) (int, string, error) { + if r.Header.Get("X-Payment-Response") == "" && r.Header.Get("X-Payment") == "" { + return 0, "", fmt.Errorf("no settled payment forwarded") + } + v, _ := strconv.Atoi(r.Header.Get("X-Dataset-Version")) + if v < 1 { + v = 1 + } + return v, r.Header.Get("X-Dataset-Atomic"), nil +} + +// --- state + url helpers --- + +func datasetServeDir(cfg *config.Config) string { return filepath.Join(cfg.ConfigDir, "dataset-serve") } + +func datasetKeyPath(cfg *config.Config, id string) string { + return filepath.Join(datasetServeDir(cfg), id+".key") +} + +func datasetStorePath(cfg *config.Config, id string) string { + return filepath.Join(datasetServeDir(cfg), id+".store.json") +} + +func writeDatasetState(cfg *config.Config, st datasetState) error { + if err := os.MkdirAll(datasetServeDir(cfg), 0o700); err != nil { + return err + } + b, _ := json.MarshalIndent(st, "", " ") + return os.WriteFile(filepath.Join(datasetServeDir(cfg), st.ID+".state.json"), b, 0o600) +} + +func removeDatasetState(cfg *config.Config, id string) error { + return os.Remove(filepath.Join(datasetServeDir(cfg), id+".state.json")) +} + +func loadDatasetState(cfg *config.Config, id string) (datasetState, error) { + dir := datasetServeDir(cfg) + if id != "" { + return readDatasetStateFile(filepath.Join(dir, id+".state.json")) + } + entries, err := os.ReadDir(dir) + if err != nil { + return datasetState{}, fmt.Errorf("no running dataset (publish one first)") + } + var found []string + for _, e := range entries { + if strings.HasSuffix(e.Name(), ".state.json") { + found = append(found, e.Name()) + } + } + switch len(found) { + case 0: + return datasetState{}, fmt.Errorf("no running dataset (publish one first)") + case 1: + return readDatasetStateFile(filepath.Join(dir, found[0])) + default: + return datasetState{}, fmt.Errorf("multiple datasets running — pass --dataset ") + } +} + +func readDatasetStateFile(path string) (datasetState, error) { + b, err := os.ReadFile(path) + if err != nil { + return datasetState{}, fmt.Errorf("read dataset state: %w", err) + } + var st datasetState + if err := json.Unmarshal(b, &st); err != nil { + return datasetState{}, err + } + return st, nil +} + +func datasetOwnerReq(ctx context.Context, st datasetState, method, path string, body []byte) (*http.Response, error) { + var r io.Reader + if body != nil { + r = strings.NewReader(string(body)) + } + req, _ := http.NewRequestWithContext(ctx, method, st.LocalAddr+path, r) + req.Header.Set("Authorization", "Bearer "+st.OwnerToken) + req.Header.Set("Content-Type", "application/json") + return http.DefaultClient.Do(req) +} + +// splitDatasetURL separates a base URL from an embedded /dataset/ path. +func splitDatasetURL(raw, flagID string) (base, id string) { + raw = strings.TrimRight(raw, "/") + if i := strings.Index(raw, "/dataset/"); i >= 0 { + base = raw[:i] + rest := strings.TrimPrefix(raw[i:], "/dataset/") + id = strings.SplitN(rest, "/", 2)[0] + if flagID != "" { + id = flagID + } + return base, id + } + return raw, flagID +} + +func orHead(v int) any { + if v == 0 { + return "head" + } + return v +} diff --git a/cmd/obol/main.go b/cmd/obol/main.go index d3432da5..e3c33eef 100644 --- a/cmd/obol/main.go +++ b/cmd/obol/main.go @@ -321,10 +321,16 @@ GLOBAL OPTIONS:{{template "visibleFlagTemplate" .}}{{end}} updateCommand(cfg), upgradeCommand(cfg), networkCommand(cfg), + nodeCommand(cfg), hermesCommand(cfg), openclawCommand(cfg), sellCommand(cfg), buyCommand(cfg), + skillsCommand(cfg), + bountyCommand(cfg), + smokeCommand(cfg), + researchCommand(cfg), + datasetCommand(cfg), modelCommand(cfg), { Name: "app", diff --git a/cmd/obol/model.go b/cmd/obol/model.go index 729aee07..4ef123e6 100644 --- a/cmd/obol/model.go +++ b/cmd/obol/model.go @@ -5,7 +5,6 @@ import ( "errors" "fmt" "math/big" - "os" "sort" "strconv" "strings" @@ -67,16 +66,20 @@ func modelSetupCommand(cfg *config.Config) *cli.Command { Flags: []cli.Flag{ &cli.StringFlag{ Name: "provider", - Usage: "Provider name: anthropic, openai, or ollama", + Usage: "Provider id (anthropic, openai, ollama, venice, openrouter, nvidia, gmi, novita, huggingface). Run with no flags to pick interactively.", }, &cli.StringFlag{ Name: "api-key", - Usage: "API key for the provider", + Usage: "API key for the provider (BYOK; also read from the provider's env var if set)", Sources: cli.EnvVars("LLM_API_KEY"), }, &cli.StringSliceFlag{ Name: "model", - Usage: "Model(s) to configure (e.g. claude-sonnet-4-5-20250929, gpt-4o)", + Usage: "Model(s) to configure (e.g. claude-sonnet-4-6, gpt-5.5, or an aggregator model id)", + }, + &cli.BoolFlag{ + Name: "free", + Usage: "Seed only the provider's curated free-tier models (OpenRouter)", }, }, Commands: []*cli.Command{ @@ -120,15 +123,17 @@ func modelSetupCommand(cfg *config.Config) *cli.Command { } } - // Provider-specific flow - switch provider { - case "ollama": + // Provider-specific flow — dispatch off the registry, not a + // hardcoded switch. Ollama is local; everything else is a + // key-based cloud/BYOK provider handled by one generic path. + prof, ok := model.ProviderByID(provider) + if !ok { + return fmt.Errorf("unknown provider %q — run `obol model setup` (no flags) to pick from the list, or `obol model setup custom --endpoint … --model …` for an unlisted OpenAI-compatible endpoint", provider) + } + if prof.ID == model.ProviderOllama { return setupOllama(cfg, u, models) - case "anthropic", "openai": - return setupCloudProvider(cfg, u, provider, apiKey, models) - default: - return fmt.Errorf("unknown provider %q — use anthropic, openai, or ollama", provider) } + return setupCloudProvider(cfg, u, prof, apiKey, models, cmd.Bool("free")) }, } } @@ -187,13 +192,14 @@ func setupOllama(cfg *config.Config, u *ui.UI, models []string) error { return promoteAndSync(cfg, u, explicit) } -func setupCloudProvider(cfg *config.Config, u *ui.UI, provider, apiKey string, models []string) error { +func setupCloudProvider(cfg *config.Config, u *ui.UI, prof model.ProviderInfo, apiKey string, models []string, free bool) error { if apiKey == "" { - var err error - - info := providerInfo(provider) + if prof.SignupURL != "" { + u.Dim(fmt.Sprintf("Get a %s API key: %s", prof.Name, prof.SignupURL)) + } - apiKey, err = u.SecretInput(fmt.Sprintf("%s API key (%s)", info.Name, info.EnvVar)) + var err error + apiKey, err = u.SecretInput(fmt.Sprintf("%s API key (%s)", prof.Name, prof.EnvVar)) if err != nil { return err } @@ -203,38 +209,34 @@ func setupCloudProvider(cfg *config.Config, u *ui.UI, provider, apiKey string, m } } - if len(models) == 0 { - // Per-provider defaults — kept in sync with what the providers - // document as their current chat-tuned flagship. Bumping these is a - // small follow-up PR when frontier models drop, and it isolates the - // "what's good today" maintenance to one place. - var defaultModel string - switch provider { - case "anthropic": - defaultModel = "claude-sonnet-4-6" - case "openai": - defaultModel = "gpt-5.5" + // --free: seed the provider's curated free-tier models (unless the + // operator already named explicit --model values). + if free { + if len(prof.Free) == 0 { + return fmt.Errorf("--free is not available for %s (no curated free models); pass --model instead", prof.Name) } + if len(models) == 0 { + models = append([]string(nil), prof.Free...) + u.Infof("Seeding %d curated free %s model(s)", len(models), prof.Name) + } + } - // Interactive: let the user override the default with a free-text - // entry. Non-interactive (no TTY): silently use the default — the - // caller can always pass --model to be explicit. - chosen := defaultModel - if defaultModel != "" && u.IsTTY() && !u.IsJSON() { - input, err := u.Input(fmt.Sprintf("Model for %s", provider), defaultModel) - if err != nil { - return err - } - if strings.TrimSpace(input) != "" { - chosen = strings.TrimSpace(input) - } + // Resolve a model when none was given: the registry Default, else (for + // BYOK aggregators with a rotating catalog) the live /v1/models list. + if len(models) == 0 { + chosen, err := resolveSetupModel(u, prof, apiKey) + if err != nil { + return err } if chosen != "" { models = []string{chosen} } } + if len(models) == 0 { + return fmt.Errorf("no model selected for %s — pass --model ", prof.Name) + } - if err := model.ConfigureLiteLLM(cfg, u, provider, apiKey, models); err != nil { + if err := model.ConfigureLiteLLM(cfg, u, prof.ID, apiKey, models); err != nil { u.Print("") u.Print(" Hint: Configuration stored in: litellm-config ConfigMap (llm namespace)") @@ -247,6 +249,58 @@ func setupCloudProvider(cfg *config.Config, u *ui.UI, provider, apiKey string, m return promoteAndSync(cfg, u, models) } +// resolveSetupModel picks a model when the operator passed none. A registry +// Default wins (overridable in a TTY). With no static default — BYOK +// aggregators whose catalog rotates — it lists the live /v1/models endpoint: +// a picker in a TTY, otherwise an error naming real ids so the operator can +// re-run with --model. Returns "" only when there is genuinely nothing to +// pick (the caller then errors). +func resolveSetupModel(u *ui.UI, prof model.ProviderInfo, apiKey string) (string, error) { + if prof.Default != "" { + if u.IsTTY() && !u.IsJSON() { + input, err := u.Input(fmt.Sprintf("Model for %s", prof.ID), prof.Default) + if err != nil { + return "", err + } + if strings.TrimSpace(input) != "" { + return strings.TrimSpace(input), nil + } + } + return prof.Default, nil + } + + if !prof.IsBYOK() { + return "", nil + } + + ids, err := model.FetchOpenAICompatibleModels(prof.BaseURL, apiKey) + if err != nil { + u.Dim(fmt.Sprintf("Couldn't list %s models (%v)", prof.Name, err)) + if u.IsTTY() && !u.IsJSON() { + return u.Input(fmt.Sprintf("Model id for %s", prof.Name), "") + } + return "", fmt.Errorf("could not resolve a model for %s: pass --model (keys/models at %s)", prof.Name, prof.SignupURL) + } + + if u.IsTTY() && !u.IsJSON() { + shown := ids + if len(shown) > 30 { + shown = shown[:30] + } + idx, err := u.Select(fmt.Sprintf("Select a %s model:", prof.Name), shown, 0) + if err != nil { + return "", err + } + return shown[idx], nil + } + + sample := ids + if len(sample) > 8 { + sample = sample[:8] + } + return "", fmt.Errorf("pass --model for %s; available include: %s", prof.Name, strings.Join(sample, ", ")) +} + // syncAgentModels re-renders the stack-managed Hermes default agent from the // current LiteLLM model inventory. func syncAgentModels(cfg *config.Config, u *ui.UI) error { @@ -852,17 +906,6 @@ func modelRemoveCommand(cfg *config.Config) *cli.Command { } } -func providerInfo(id string) model.ProviderInfo { - providers, _ := model.GetAvailableProviders(nil) - for _, p := range providers { - if p.ID == id { - return p - } - } - - return model.ProviderInfo{ID: id, Name: id} -} - // detectedCredential describes a credential found in the environment. type detectedCredential struct { key string // the actual API key value (empty for Ollama) @@ -875,22 +918,22 @@ type detectedCredential struct { func detectCredentials() map[string]detectedCredential { creds := make(map[string]detectedCredential) - // Anthropic: check ANTHROPIC_API_KEY, then CLAUDE_CODE_OAUTH_TOKEN - if key := os.Getenv("ANTHROPIC_API_KEY"); key != "" { - creds["anthropic"] = detectedCredential{key: key, source: "ANTHROPIC_API_KEY"} - } else if key := os.Getenv("CLAUDE_CODE_OAUTH_TOKEN"); key != "" { - creds["anthropic"] = detectedCredential{key: key, source: "CLAUDE_CODE_OAUTH_TOKEN"} - } - - // OpenAI: check OPENAI_API_KEY - if key := os.Getenv("OPENAI_API_KEY"); key != "" { - creds["openai"] = detectedCredential{key: key, source: "OPENAI_API_KEY"} - } + // Registry-driven: every provider's primary + alternate env vars are + // checked via model.ResolveAPIKey, so a new provider row auto-detects + // without editing this function. Ollama has no key — probe reachability. + providers, _ := model.GetAvailableProviders(nil) + for _, p := range providers { + if p.ID == model.ProviderOllama { + if ollamaModels, err := model.ListOllamaModels(); err == nil && len(ollamaModels) > 0 { + creds[p.ID] = detectedCredential{ + source: fmt.Sprintf("%d model(s) available", len(ollamaModels)), + } + } + continue + } - // Ollama: check if reachable with models - if ollamaModels, err := model.ListOllamaModels(); err == nil && len(ollamaModels) > 0 { - creds["ollama"] = detectedCredential{ - source: fmt.Sprintf("%d model(s) available", len(ollamaModels)), + if key, envVar := model.ResolveAPIKey(p.ID); key != "" { + creds[p.ID] = detectedCredential{key: key, source: envVar} } } diff --git a/cmd/obol/model_test.go b/cmd/obol/model_test.go index 972ef90d..86f9b7f3 100644 --- a/cmd/obol/model_test.go +++ b/cmd/obol/model_test.go @@ -3,6 +3,9 @@ package main import ( "testing" + "github.com/ObolNetwork/obol-stack/internal/model" + "github.com/urfave/cli/v3" + "github.com/ObolNetwork/obol-stack/internal/config" ) @@ -69,3 +72,46 @@ func TestSetupPromoteList(t *testing.T) { } }) } + +// TestModelSetup_BYOKFlags pins the BYOK onboarding surface: the setup +// command exposes --provider/--api-key/--model/--free, and every BYOK +// aggregator in the registry dispatches through the generic cloud path +// (only Ollama is special-cased). +func TestModelSetup_BYOKFlags(t *testing.T) { + cfg := &config.Config{} + var setup *cli.Command + for _, sub := range modelCommand(cfg).Commands { + if sub.Name == "setup" { + setup = sub + } + } + if setup == nil { + t.Fatal("model setup command missing") + } + + want := map[string]bool{"provider": false, "api-key": false, "model": false, "free": false} + for _, f := range setup.Flags { + for _, n := range f.Names() { + if _, ok := want[n]; ok { + want[n] = true + } + } + } + for n, found := range want { + if !found { + t.Errorf("model setup missing --%s flag", n) + } + } + + // The registry must carry the BYOK providers this PR adds. + for _, id := range []string{"venice", "openrouter", "nvidia", "gmi", "novita", "huggingface"} { + p, ok := model.ProviderByID(id) + if !ok { + t.Errorf("provider %q missing from registry", id) + continue + } + if p.BaseURL == "" { + t.Errorf("provider %q has no BaseURL", id) + } + } +} diff --git a/cmd/obol/node.go b/cmd/obol/node.go new file mode 100644 index 00000000..269af862 --- /dev/null +++ b/cmd/obol/node.go @@ -0,0 +1,102 @@ +package main + +import ( + "context" + "encoding/json" + "fmt" + + "github.com/ObolNetwork/obol-stack/internal/config" + "github.com/ObolNetwork/obol-stack/internal/kubectl" + "github.com/ObolNetwork/obol-stack/internal/stack" + "github.com/urfave/cli/v3" +) + +// nodeCommand groups commands for adding and inspecting worker nodes that join +// this stack's cluster. Multi-node only makes sense on the k3s backend — a +// k3d/Docker master's flannel overlay is not routable off-host — so the +// subcommands guard on the active backend. +func nodeCommand(cfg *config.Config) *cli.Command { + return &cli.Command{ + Name: "node", + Usage: "Add and inspect worker nodes that join this stack's cluster (k3s backend)", + Commands: []*cli.Command{ + nodeTokenCommand(cfg), + nodeListCommand(cfg), + }, + } +} + +func nodeTokenCommand(cfg *config.Config) *cli.Command { + return &cli.Command{ + Name: "token", + Usage: "Print the join command for adding a Linux worker node to this k3s cluster", + Flags: []cli.Flag{ + &cli.StringFlag{ + Name: "server-url", + Usage: "Override the K3S_URL agents dial (default https://:6443)", + }, + &cli.BoolFlag{Name: "json", Usage: "Output machine-readable JSON"}, + }, + Action: func(ctx context.Context, cmd *cli.Command) error { + u := getUI(cmd) + + backend, err := stack.LoadBackend(cfg) + if err != nil { + return err + } + + if backend.Name() != stack.BackendK3s { + return fmt.Errorf( + "obol node requires the k3s backend (current backend: %q)\n"+ + "A k3d/Docker master cannot accept remote node joins — its flannel overlay is not routable off-host.\n"+ + "Re-init on a Linux host with: obol stack init --backend k3s", + backend.Name()) + } + + token, err := stack.ReadK3sNodeToken(cfg) + if err != nil { + return err + } + + serverURL := stack.K3sServerURL(cmd.String("server-url")) + version := stack.K3sBinaryVersion(cfg) + joinCmd := stack.K3sAgentJoinCommand(serverURL, token, version) + + if cmd.Bool("json") { + out, _ := json.MarshalIndent(map[string]string{ + "serverUrl": serverURL, + "token": token, + "version": version, + "joinCommand": joinCmd, + }, "", " ") + fmt.Println(string(out)) + + return nil + } + + u.Info("Run this on a Linux worker node to join the cluster:") + fmt.Printf("\n %s\n\n", joinCmd) + u.Detail("Server", serverURL) + u.Dim("Multi-homed / Wi-Fi node? append: --node-ip --flannel-iface ") + u.Dim("GPU node? label it at join: --node-label obol.tech/accelerator=nvidia") + + return nil + }, + } +} + +func nodeListCommand(cfg *config.Config) *cli.Command { + return &cli.Command{ + Name: "list", + Usage: "List cluster nodes with their accelerator labels", + Action: func(ctx context.Context, cmd *cli.Command) error { + if err := kubectl.EnsureCluster(cfg); err != nil { + return err + } + + bin, kc := kubectl.Paths(cfg) + + return kubectl.Run(bin, kc, "get", "nodes", "-o", "wide", "-L", "obol.tech/accelerator") + }, + } +} diff --git a/cmd/obol/research.go b/cmd/obol/research.go new file mode 100644 index 00000000..d9c51585 --- /dev/null +++ b/cmd/obol/research.go @@ -0,0 +1,397 @@ +package main + +// obol research — owner side of a decentralized auto-research program. +// +// obol research publish start the KB + membership server on this +// machine, expose it over a Cloudflare quick +// tunnel, print the public URL workers join. +// obol research approve admit a worker (membership decision). +// obol research status [] roster, results, champion, payouts. +// +// The server is the host gateway (same spirit as `obol sell inference`): it +// runs on the owner's machine, not in the cluster, and reaches remote +// workers over the real internet via Cloudflare — no tailscale, every KB +// route gated by a groupauth member token. + +import ( + "bufio" + "context" + "crypto/rand" + "encoding/hex" + "encoding/json" + "fmt" + "io" + "net" + "net/http" + "os" + "os/exec" + "os/signal" + "path/filepath" + "regexp" + "strings" + "syscall" + "time" + + "github.com/ObolNetwork/obol-stack/internal/config" + "github.com/ObolNetwork/obol-stack/internal/research/kb" + "github.com/ObolNetwork/obol-stack/internal/research/server" + "github.com/urfave/cli/v3" +) + +// researchState is persisted so `approve`/`status` can reach a running +// `publish` server. +type researchState struct { + Program string `json:"program"` + LocalAddr string `json:"local_addr"` // http://127.0.0.1:PORT + PublicURL string `json:"public_url"` // https://xxx.trycloudflare.com + OwnerToken string `json:"owner_token"` // gates approve/status +} + +func researchCommand(cfg *config.Config) *cli.Command { + return &cli.Command{ + Name: "research", + Usage: "Run a decentralized auto-research program (publish an ID, admit workers, collect results)", + Commands: []*cli.Command{ + researchPublishCommand(cfg), + researchApproveCommand(cfg), + researchStatusCommand(cfg), + }, + } +} + +func researchPublishCommand(cfg *config.Config) *cli.Command { + return &cli.Command{ + Name: "publish", + Usage: "Publish a research program: host the KB + membership server and expose it over a Cloudflare tunnel", + ArgsUsage: "", + Description: `Starts the program's knowledge-base + membership server on this machine +and opens a Cloudflare quick tunnel so workers on other obol-stacks can +join over the open internet. Runs in the foreground — Ctrl-C stops the +program. Approve joining workers from another shell with +'obol research approve '.`, + Flags: []cli.Flag{ + &cli.StringFlag{Name: "objective", Usage: "Free-text hypothesis space"}, + &cli.StringFlag{Name: "metric", Usage: "Metric name (e.g. val_bpb)", Required: true}, + &cli.StringFlag{Name: "direction", Usage: "minimize | maximize", Value: "minimize"}, + &cli.StringFlag{Name: "accept", Usage: "beats-champion | threshold", Value: "beats-champion"}, + &cli.FloatFlag{Name: "threshold", Usage: "Acceptance threshold (when --accept=threshold)"}, + &cli.FloatFlag{Name: "baseline", Usage: "Reference metric value; first improvement's impact is measured against it"}, + &cli.FloatFlag{Name: "pool", Usage: "Reward pool", Value: 0}, + &cli.StringFlag{Name: "token", Usage: "Reward token", Value: "OBOL"}, + &cli.StringFlag{Name: "network", Usage: "Payment chain", Value: "base-sepolia"}, + &cli.StringFlag{Name: "membership", Usage: "open | invite", Value: "invite"}, + &cli.StringFlag{Name: "split", Usage: "by-impact | champion-takes-all", Value: "by-impact"}, + &cli.IntFlag{Name: "port", Usage: "Local port (0 = pick a free one)", Value: 0}, + &cli.BoolFlag{Name: "no-tunnel", Usage: "Serve locally only (skip the Cloudflare tunnel)"}, + }, + Action: func(ctx context.Context, cmd *cli.Command) error { + u := getUI(cmd) + if cmd.NArg() != 1 { + return fmt.Errorf("program name required: obol research publish ") + } + name := strings.TrimSpace(cmd.Args().First()) + + prog, err := programFromFlags(name, cmd) + if err != nil { + return err + } + + ownerToken, err := randomToken("obol-research-owner-") + if err != nil { + return err + } + srv := server.New(prog, cmd.String("membership"), ownerToken, nil) + + ln, err := net.Listen("tcp", fmt.Sprintf("127.0.0.1:%d", cmd.Int("port"))) + if err != nil { + return fmt.Errorf("listen: %w", err) + } + localAddr := "http://" + ln.Addr().String() + httpSrv := &http.Server{Handler: srv.Handler()} + go func() { _ = httpSrv.Serve(ln) }() + + publicURL := localAddr + var tunnel *exec.Cmd + if !cmd.Bool("no-tunnel") { + u.Info("Opening Cloudflare tunnel …") + turl, tcmd, terr := startQuickTunnel(ctx, ln.Addr().String()) + if terr != nil { + u.Warnf("tunnel failed (%v) — serving locally only at %s", terr, localAddr) + } else { + publicURL = turl + tunnel = tcmd + } + } + + st := researchState{Program: name, LocalAddr: localAddr, PublicURL: publicURL, OwnerToken: ownerToken} + if err := writeResearchState(cfg, st); err != nil { + u.Warnf("could not persist program state (approve/status from another shell may not work): %v", err) + } + + u.Successf("Research program %q published", name) + u.Infof("Public URL: %s", publicURL) + u.Infof("Metric: %s (%s, %s)", prog.Criteria.Metric, prog.Criteria.Direction, prog.Criteria.Accept) + u.Infof("Membership: %s", cmd.String("membership")) + u.Blank() + u.Bold("Workers join with:") + u.Printf(" python3 worker.py --kb %s --program %s --worker ", publicURL, name) + if cmd.String("membership") == server.MembershipInvite { + u.Dim("Approve each worker's printed code: obol research approve ") + } + u.Blank() + u.Dim("Ctrl-C to stop the program.") + + // Run until interrupted. + sigCtx, stop := signal.NotifyContext(ctx, os.Interrupt, syscall.SIGTERM) + defer stop() + <-sigCtx.Done() + + u.Info("Stopping …") + shutCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + _ = httpSrv.Shutdown(shutCtx) + if tunnel != nil && tunnel.Process != nil { + _ = tunnel.Process.Kill() + } + _ = removeResearchState(cfg, name) + return nil + }, + } +} + +func researchApproveCommand(cfg *config.Config) *cli.Command { + return &cli.Command{ + Name: "approve", + Usage: "Admit a worker to the program (the membership decision)", + ArgsUsage: "", + Flags: []cli.Flag{&cli.StringFlag{Name: "program", Usage: "Program name (defaults to the only running one)"}}, + Action: func(ctx context.Context, cmd *cli.Command) error { + u := getUI(cmd) + if cmd.NArg() != 1 { + return fmt.Errorf("user code required: obol research approve ") + } + st, err := loadResearchState(cfg, cmd.String("program")) + if err != nil { + return err + } + body, _ := json.Marshal(map[string]string{"user_code": cmd.Args().First()}) + resp, err := ownerPost(ctx, st, "/auth/device/approve", body) + if err != nil { + return err + } + defer resp.Body.Close() + if resp.StatusCode != http.StatusOK { + b, _ := io.ReadAll(resp.Body) + return fmt.Errorf("approve failed (%d): %s", resp.StatusCode, strings.TrimSpace(string(b))) + } + u.Successf("Approved %s into %q", cmd.Args().First(), st.Program) + return nil + }, + } +} + +func researchStatusCommand(cfg *config.Config) *cli.Command { + return &cli.Command{ + Name: "status", + Usage: "Show roster, results, champion, and payouts", + ArgsUsage: "[]", + Action: func(ctx context.Context, cmd *cli.Command) error { + u := getUI(cmd) + st, err := loadResearchState(cfg, cmd.Args().First()) + if err != nil { + return err + } + resp, err := ownerGet(ctx, st, "/status") + if err != nil { + return err + } + defer resp.Body.Close() + var out struct { + Program kb.Program `json:"program"` + Roster []string `json:"roster"` + Results []kb.Result `json:"results"` + Champion *kb.Result `json:"champion"` + Payouts map[string]float64 `json:"payouts"` + } + if err := json.NewDecoder(resp.Body).Decode(&out); err != nil { + return err + } + u.Bold(fmt.Sprintf("Program %s — %s (%s)", out.Program.ID, out.Program.Criteria.Metric, out.Program.Criteria.Direction)) + u.Infof("Members: %s", strings.Join(out.Roster, ", ")) + if out.Champion != nil { + u.Successf("Champion: %s = %.6f (worker %s)", out.Program.Criteria.Metric, out.Champion.Value, out.Champion.Worker) + } else { + u.Dim("Champion: none yet") + } + for _, r := range out.Results { + mark := "·" + if r.Champion { + mark = "★" + } else if r.Accepted { + mark = "+" + } + u.Printf(" %s #%d %-10s %.6f impact %.6f", mark, r.Seq, r.Worker, r.Value, r.Impact) + } + if len(out.Payouts) > 0 { + u.Blank() + u.Bold("Payouts (" + string(out.Program.Split) + "):") + for w, amt := range out.Payouts { + u.Printf(" %-10s %.6f %s", w, amt, out.Program.Token) + } + } + return nil + }, + } +} + +// --- helpers --- + +func programFromFlags(name string, cmd *cli.Command) (kb.Program, error) { + dir := kb.Direction(cmd.String("direction")) + if dir != kb.Minimize && dir != kb.Maximize { + return kb.Program{}, fmt.Errorf("--direction must be minimize or maximize") + } + acc := kb.AcceptMode(cmd.String("accept")) + if acc != kb.BeatsChampion && acc != kb.Threshold { + return kb.Program{}, fmt.Errorf("--accept must be beats-champion or threshold") + } + split := kb.SplitMode(cmd.String("split")) + if split != kb.ByImpact && split != kb.ChampionTakesAll { + return kb.Program{}, fmt.Errorf("--split must be by-impact or champion-takes-all") + } + p := kb.Program{ + ID: name, + Objective: cmd.String("objective"), + Criteria: kb.Criteria{Metric: cmd.String("metric"), Direction: dir, Accept: acc}, + Pool: cmd.Float("pool"), + Token: cmd.String("token"), + Network: cmd.String("network"), + Split: split, + } + if cmd.IsSet("threshold") { + t := cmd.Float("threshold") + p.Criteria.Threshold = &t + } + if cmd.IsSet("baseline") { + b := cmd.Float("baseline") + p.Baseline = &b + } + return p, nil +} + +func randomToken(prefix string) (string, error) { + b := make([]byte, 24) + if _, err := rand.Read(b); err != nil { + return "", err + } + return prefix + hex.EncodeToString(b), nil +} + +var trycloudflareRe = regexp.MustCompile(`https://[a-z0-9-]+\.trycloudflare\.com`) + +// startQuickTunnel launches `cloudflared tunnel --url http://addr` and +// returns the public trycloudflare URL once it appears on stderr. +func startQuickTunnel(ctx context.Context, addr string) (string, *exec.Cmd, error) { + bin, err := exec.LookPath("cloudflared") + if err != nil { + if _, statErr := os.Stat("/opt/homebrew/bin/cloudflared"); statErr == nil { + bin = "/opt/homebrew/bin/cloudflared" + } else { + return "", nil, fmt.Errorf("cloudflared not found") + } + } + c := exec.CommandContext(ctx, bin, "tunnel", "--no-autoupdate", "--url", "http://"+addr) + stderr, err := c.StderrPipe() + if err != nil { + return "", nil, err + } + if err := c.Start(); err != nil { + return "", nil, err + } + urlCh := make(chan string, 1) + go func() { + sc := bufio.NewScanner(stderr) + for sc.Scan() { + if m := trycloudflareRe.FindString(sc.Text()); m != "" { + urlCh <- m + // keep draining so the pipe doesn't block cloudflared + for sc.Scan() { + } + return + } + } + }() + select { + case u := <-urlCh: + return u, c, nil + case <-time.After(40 * time.Second): + _ = c.Process.Kill() + return "", nil, fmt.Errorf("timed out waiting for tunnel URL") + } +} + +func researchStateDir(cfg *config.Config) string { return filepath.Join(cfg.ConfigDir, "research") } + +func writeResearchState(cfg *config.Config, st researchState) error { + dir := researchStateDir(cfg) + if err := os.MkdirAll(dir, 0o700); err != nil { + return err + } + b, _ := json.MarshalIndent(st, "", " ") + return os.WriteFile(filepath.Join(dir, st.Program+".json"), b, 0o600) +} + +func removeResearchState(cfg *config.Config, name string) error { + return os.Remove(filepath.Join(researchStateDir(cfg), name+".json")) +} + +// loadResearchState reads the named program's state, or the only one if name +// is empty. +func loadResearchState(cfg *config.Config, name string) (researchState, error) { + dir := researchStateDir(cfg) + if name != "" { + return readResearchStateFile(filepath.Join(dir, name+".json")) + } + entries, err := os.ReadDir(dir) + if err != nil { + return researchState{}, fmt.Errorf("no running program (publish one first)") + } + var found []string + for _, e := range entries { + if strings.HasSuffix(e.Name(), ".json") { + found = append(found, e.Name()) + } + } + if len(found) == 0 { + return researchState{}, fmt.Errorf("no running program (publish one first)") + } + if len(found) > 1 { + return researchState{}, fmt.Errorf("multiple programs running — pass the name") + } + return readResearchStateFile(filepath.Join(dir, found[0])) +} + +func readResearchStateFile(path string) (researchState, error) { + b, err := os.ReadFile(path) + if err != nil { + return researchState{}, fmt.Errorf("read program state: %w", err) + } + var st researchState + if err := json.Unmarshal(b, &st); err != nil { + return researchState{}, err + } + return st, nil +} + +func ownerPost(ctx context.Context, st researchState, path string, body []byte) (*http.Response, error) { + req, _ := http.NewRequestWithContext(ctx, http.MethodPost, st.LocalAddr+path, strings.NewReader(string(body))) + req.Header.Set("Authorization", "Bearer "+st.OwnerToken) + req.Header.Set("Content-Type", "application/json") + return http.DefaultClient.Do(req) +} + +func ownerGet(ctx context.Context, st researchState, path string) (*http.Response, error) { + req, _ := http.NewRequestWithContext(ctx, http.MethodGet, st.LocalAddr+path, nil) + req.Header.Set("Authorization", "Bearer "+st.OwnerToken) + return http.DefaultClient.Do(req) +} diff --git a/cmd/obol/sell.go b/cmd/obol/sell.go index 6e1287c7..fed6d1e0 100644 --- a/cmd/obol/sell.go +++ b/cmd/obol/sell.go @@ -16,6 +16,7 @@ import ( "os/exec" "os/signal" "path/filepath" + "regexp" "runtime" "strconv" "strings" @@ -53,6 +54,7 @@ func sellCommand(cfg *config.Config) *cli.Command { sellHTTPCommand(cfg), sellMCPCommand(cfg), sellAgentCommand(cfg), + sellSkillCommand(cfg), sellDemoCommand(cfg), sellListCommand(cfg), sellStatusCommand(cfg), @@ -86,6 +88,68 @@ func payToFlag(usage string) *cli.StringFlag { } } +// Payment-method selector values for the --pay-with flag. +const ( + payMethodCrypto = "crypto" + payMethodCard = "card" +) + +var ( + // stripeAccountRe matches a Stripe account id (e.g. acct_1A2b3C4d). + stripeAccountRe = regexp.MustCompile(`^acct_[A-Za-z0-9]+$`) + // currencyRe matches a lower-case ISO-4217 currency code (e.g. usd). + currencyRe = regexp.MustCompile(`^[a-z]{3}$`) +) + +// normalizePayWith lower-cases/trims the --pay-with value and defaults an +// empty value to crypto so existing flag-free invocations are unchanged. +func normalizePayWith(v string) string { + v = strings.ToLower(strings.TrimSpace(v)) + if v == "" { + return payMethodCrypto + } + return v +} + +// resolveCardPayment validates the card flags and returns the +// spec.payment map for an MPP credit-card (Stripe) ServiceOffer. It is the +// card analog of the crypto wallet/chain/asset resolution in the sell +// actions: instead of a chain + 0x payTo it emits method=card plus a card +// block carrying the Stripe destination account and currency. +func resolveCardPayment(cmd *cli.Command, price map[string]any) (map[string]any, error) { + account := strings.TrimSpace(cmd.String("stripe-account")) + if account == "" { + return nil, fmt.Errorf("--stripe-account is required with --pay-with card (the acct_... that receives card funds)") + } + if !stripeAccountRe.MatchString(account) { + return nil, fmt.Errorf("invalid --stripe-account %q: expected a Stripe account id like acct_1A2b3C4d", account) + } + currency := strings.ToLower(strings.TrimSpace(cmd.String("card-currency"))) + if currency == "" { + currency = "usd" + } + if !currencyRe.MatchString(currency) { + return nil, fmt.Errorf("invalid --card-currency %q: expected a 3-letter ISO-4217 code like usd", currency) + } + card := map[string]any{ + "provider": "stripe", + "account": account, + "currency": currency, + } + // The Stripe "machine payments" network id is advertised in the 402 + // challenge so MPP card clients can mint a Shared Payment Token. Defaults + // from the STRIPE_NETWORK_ID env var. + if networkID := strings.TrimSpace(cmd.String("stripe-network-id")); networkID != "" { + card["networkId"] = networkID + } + return map[string]any{ + "method": payMethodCard, + "card": card, + "maxTimeoutSeconds": cmd.Int("max-timeout"), + "price": price, + }, nil +} + // --------------------------------------------------------------------------- // sell inference — start a local x402 gateway for LLM inference // --------------------------------------------------------------------------- @@ -602,6 +666,10 @@ Examples: Name: "facilitator", Usage: "x402 facilitator URL (verify/settle)", }, + &cli.StringFlag{ + Name: "bounty-reports-dir", + Usage: "Directory serving ServiceBounty A2UI reports via the free bounty_report tool (default: $OBOL_DATA_DIR/bounty-reports)", + }, }, Action: func(ctx context.Context, cmd *cli.Command) error { u := getUI(cmd) @@ -624,18 +692,24 @@ Examples: return err } + reportsDir := cmd.String("bounty-reports-dir") + if reportsDir == "" { + reportsDir = filepath.Join(cfg.DataDir, "bounty-reports") + } + u.Infof("Starting paid MCP server %q on port %d (Ctrl-C to stop)", name, cmd.Int("port")) return x402mcp.Serve(ctx, x402mcp.Options{ - Name: name, - ToolName: cmd.String("tool-name"), - Description: cmd.String("description"), - Port: cmd.Int("port"), - PayTo: payTo, - Price: cmd.String("price"), - Chain: cmd.String("chain"), - FacilitatorURL: facilitator, - Upstream: cmd.String("upstream"), - UpstreamHeaders: headers, + Name: name, + ToolName: cmd.String("tool-name"), + Description: cmd.String("description"), + Port: cmd.Int("port"), + PayTo: payTo, + Price: cmd.String("price"), + Chain: cmd.String("chain"), + FacilitatorURL: facilitator, + Upstream: cmd.String("upstream"), + UpstreamHeaders: headers, + BountyReportsDir: reportsDir, }) }, } @@ -706,6 +780,25 @@ Examples: Usage: "Target namespace for the ServiceOffer", Value: "default", }, + &cli.StringFlag{ + Name: "pay-with", + Usage: "Payment method: 'crypto' (x402 on-chain stablecoin, default) or 'card' (MPP Stripe credit card)", + Value: payMethodCrypto, + }, + &cli.StringFlag{ + Name: "stripe-account", + Usage: "Stripe destination account id (acct_...) that receives card funds — required with --pay-with card (card analog of --pay-to)", + }, + &cli.StringFlag{ + Name: "card-currency", + Usage: "ISO-4217 currency for card charges", + Value: "usd", + }, + &cli.StringFlag{ + Name: "stripe-network-id", + Usage: "Stripe \"machine payments\" network id advertised in the 402 challenge (so MPP card clients can mint a Shared Payment Token)", + Sources: cli.EnvVars("STRIPE_NETWORK_ID"), + }, &cli.StringFlag{ Name: "upstream", Usage: "Upstream service name", @@ -840,32 +933,17 @@ Examples: return err } - // Auto-discover wallet from remote-signer if not set. - wallet := cmd.String("pay-to") - if wallet == "" { - if resolved, err := hermes.ResolveWalletAddress(cfg); err == nil { - wallet = resolved - u.Infof("Using wallet from remote-signer: %s", wallet) - } else if u.IsTTY() { - var inputErr error - wallet, inputErr = u.Input("Wallet address (payment recipient)", "") - if inputErr != nil || wallet == "" { - return fmt.Errorf("recipient required: use --pay-to or set X402_WALLET") - } - } else { - return fmt.Errorf("recipient required: use --pay-to or set X402_WALLET") - } - } - if err := x402verifier.ValidateWallet(wallet); err != nil { - return err - } - - // Ensure the x402-verifier CA bundle is populated so TLS verification of - // the facilitator works. This is a no-op if already populated. Non-fatal. - x402verifier.PopulateCABundle(cfg) - ns := cmd.String("namespace") + payWith := normalizePayWith(cmd.String("pay-with")) + if payWith != payMethodCrypto && payWith != payMethodCard { + return fmt.Errorf("--pay-with must be %q or %q, got %q", payMethodCrypto, payMethodCard, cmd.String("pay-with")) + } + isCard := payWith == payMethodCard + // wallet is the crypto payTo recipient; resolved in the crypto + // branch below and left empty for card offers. + var wallet string + if cmd.String("upstream") == "" { return fmt.Errorf("upstream service name required: use --upstream \n\n Example: obol sell http %s --upstream my-svc --port 8080 --pay-to 0x... --chain base-sepolia --price 0.001", name) } @@ -889,10 +967,59 @@ Examples: price["perHour"] = priceTable.PerHour } - chainName := cmd.String("chain") - assetTerms, err := resolveAssetTerms(cmd, &chainName) - if err != nil { - return err + // Resolve the payment block per the selected method. + var ( + payment map[string]any + assetTerms schemas.AssetTerms // crypto only; stays zero for card + ) + switch payWith { + case payMethodCard: + payment, err = resolveCardPayment(cmd, price) + if err != nil { + return err + } + u.Infof("Selling via credit card (Stripe account %s, %s)", + cmd.String("stripe-account"), strings.ToLower(cmd.String("card-currency"))) + default: // payMethodCrypto + // Auto-discover wallet from remote-signer if not set. + wallet = cmd.String("pay-to") + if wallet == "" { + if resolved, rerr := hermes.ResolveWalletAddress(cfg); rerr == nil { + wallet = resolved + u.Infof("Using wallet from remote-signer: %s", wallet) + } else if u.IsTTY() { + var inputErr error + wallet, inputErr = u.Input("Wallet address (payment recipient)", "") + if inputErr != nil || wallet == "" { + return fmt.Errorf("recipient required: use --pay-to or set X402_WALLET") + } + } else { + return fmt.Errorf("recipient required: use --pay-to or set X402_WALLET") + } + } + if err := x402verifier.ValidateWallet(wallet); err != nil { + return err + } + // Ensure the x402-verifier CA bundle is populated so TLS + // verification of the facilitator works. No-op if already + // populated. Non-fatal. + x402verifier.PopulateCABundle(cfg) + + chainName := cmd.String("chain") + assetTerms, err = resolveAssetTerms(cmd, &chainName) + if err != nil { + return err + } + payment = map[string]any{ + "scheme": "exact", + "network": chainName, + "payTo": wallet, + "maxTimeoutSeconds": cmd.Int("max-timeout"), + "price": price, + } + if !assetTerms.IsZero() { + payment["asset"] = assetTerms + } } spec := map[string]any{ @@ -903,16 +1030,7 @@ Examples: "port": cmd.Int("port"), "healthPath": cmd.String("health-path"), }, - "payment": map[string]any{ - "scheme": "exact", - "network": chainName, - "payTo": wallet, - "maxTimeoutSeconds": cmd.Int("max-timeout"), - "price": price, - }, - } - if !assetTerms.IsZero() { - spec["payment"].(map[string]any)["asset"] = assetTerms + "payment": payment, } if path := cmd.String("path"); path != "" { @@ -941,21 +1059,33 @@ Examples: prov.Framework, prov.MetricName, prov.MetricValue, prov.ParamCount) } - reg, registerEnabled, err := buildSellRegistrationConfig(name, sellRegistrationInput{ - NoRegister: cmd.Bool("no-register"), - Register: cmd.Bool("register"), - Name: cmd.String("register-name"), - Description: cmd.String("description"), - Image: cmd.String("register-image"), - Skills: cmd.StringSlice("register-skills"), - Domains: cmd.StringSlice("register-domains"), - MetadataPairs: cmd.StringSlice("register-metadata"), - }) - if err != nil { - return err - } - if registerEnabled { - spec["registration"] = reg + // ERC-8004 registration is an on-chain identity step and only + // applies to crypto offers. Card offers publish the payment-gated + // route without registration. + var registerEnabled bool + if isCard { + if cmd.Bool("register") { + return fmt.Errorf("ERC-8004 registration is not supported for --pay-with card yet; re-run with --no-register") + } + u.Info("Card offers are not ERC-8004 registered (no on-chain identity); publishing the payment-gated route only.") + } else { + reg, enabled, rerr := buildSellRegistrationConfig(name, sellRegistrationInput{ + NoRegister: cmd.Bool("no-register"), + Register: cmd.Bool("register"), + Name: cmd.String("register-name"), + Description: cmd.String("description"), + Image: cmd.String("register-image"), + Skills: cmd.StringSlice("register-skills"), + Domains: cmd.StringSlice("register-domains"), + MetadataPairs: cmd.StringSlice("register-metadata"), + }) + if rerr != nil { + return rerr + } + registerEnabled = enabled + if registerEnabled { + spec["registration"] = reg + } } // When registration is enabled, the serviceoffer-controller reads the @@ -2811,10 +2941,30 @@ func sellDeleteCommand(cfg *config.Config) *cli.Command { // controller renders an active:false / x402Support:false tombstone // document while keeping the agentId. + // For type=skill offers the bundle ConfigMap is CLI/agent-created + // (not controller-owned, so no ownerRef GC). Capture its name + // before the offer disappears and delete it afterwards. + bundleCM := "" + { + bin, kubeconfig := kubectl.Paths(cfg) + if out, err := kubectl.Output(bin, kubeconfig, "get", "serviceoffers.obol.org", name, "-n", ns, + "-o", "jsonpath={.spec.type}/{.spec.skill.bundleConfigMap}"); err == nil { + if typ, cm, ok := strings.Cut(strings.TrimSpace(out), "/"); ok && typ == "skill" && cm != "" { + bundleCM = cm + } + } + } + if err := kubectlRun(cfg, "delete", "serviceoffers.obol.org", name, "-n", ns); err != nil { return err } + if bundleCM != "" { + if err := kubectlRun(cfg, "delete", "configmap", bundleCM, "-n", ns, "--ignore-not-found"); err != nil { + u.Warnf("could not delete skill bundle ConfigMap %s/%s: %v", ns, bundleCM, err) + } + } + // Drop the offer's manifest from the resume ledger so the next // `obol stack up` / `obol sell resume` doesn't replay an offer // the operator just deleted. Covers every ledger-persisted type @@ -4793,7 +4943,10 @@ func resumePersistedServiceOffers(cfg *config.Config, u *ui.UI) error { u.Blank() u.Infof("Resuming %d locally-persisted sell offer(s)...", len(manifests)) for _, m := range manifests { - if err := kubectlApply(cfg, m.Manifest); err != nil { + // resumeApplyManifest (sell_skill.go) routes ConfigMap items in + // List bundles through server-side apply; skill bundle payloads + // overflow the client-side last-applied annotation otherwise. + if err := resumeApplyManifest(cfg, m.Manifest); err != nil { u.Warnf("resume %s %s/%s: %v", m.label(), m.Namespace, m.Name, err) continue } diff --git a/cmd/obol/sell_skill.go b/cmd/obol/sell_skill.go new file mode 100644 index 00000000..f47453d1 --- /dev/null +++ b/cmd/obol/sell_skill.go @@ -0,0 +1,570 @@ +package main + +// obol sell skill — sell a skill (SKILL.md + scripts bundle) as one +// sellable + ratable unit. +// +// Pack the skill directory into a deterministic gzipped tarball, store +// it in a ConfigMap, and publish a ServiceOffer of type=skill. The +// serviceoffer-controller renders a restricted-PSS busybox bundle server +// from the ConfigMap and gates /services//* behind x402; buyers +// download bundle.tar.gz with a one-shot paid request and can verify the +// sha256 offline and against the seller's ERC-8004 metadata anchor. +// +// To sell a skill's *execution* rather than its bytes, gate the agent +// that carries it with the existing agent path: `obol agent new +// --skills ` then `obol sell agent `. + +import ( + "context" + "encoding/base64" + "encoding/json" + "fmt" + "os" + "path/filepath" + "regexp" + "slices" + "strings" + + "github.com/ObolNetwork/obol-stack/internal/config" + "github.com/ObolNetwork/obol-stack/internal/embed" + "github.com/ObolNetwork/obol-stack/internal/hermes" + "github.com/ObolNetwork/obol-stack/internal/kubectl" + "github.com/ObolNetwork/obol-stack/internal/monetizeapi" + "github.com/ObolNetwork/obol-stack/internal/schemas" + "github.com/ObolNetwork/obol-stack/internal/skillpkg" + "github.com/ObolNetwork/obol-stack/internal/tunnel" + "github.com/ObolNetwork/obol-stack/internal/ui" + "github.com/ObolNetwork/obol-stack/internal/validate" + x402verifier "github.com/ObolNetwork/obol-stack/internal/x402" + "github.com/urfave/cli/v3" +) + +// skillBundleConfigMapSuffix names the operator-owned ConfigMap that +// carries the gzipped bundle bytes: "-skill-bundle". Distinct +// from monetizeapi.SkillBundleWorkloadName ("so--bundle"), which +// names the controller-rendered bundle-server children. +const skillBundleConfigMapSuffix = "-skill-bundle" + +var ( + // skillNameRe mirrors the CRD pattern on spec.skill.name. + skillNameRe = regexp.MustCompile(`^[a-z0-9][a-z0-9-]*$`) + // skillVersionRe mirrors the CRD pattern on spec.skill.version. + skillVersionRe = regexp.MustCompile(`^[A-Za-z0-9][A-Za-z0-9._-]*$`) +) + +func skillBundleConfigMapName(offerName string) string { + return offerName + skillBundleConfigMapSuffix +} + +func sellSkillCommand(cfg *config.Config) *cli.Command { + return &cli.Command{ + Name: "skill", + Usage: "Sell a skill bundle (SKILL.md + scripts) as a paid download", + ArgsUsage: "", + Description: `Packages a skill directory into a deterministic gzipped tarball and +publishes it behind an x402 payment gate as a ServiceOffer of +type=skill. The bundle's sha256 is pinned in the offer, surfaced in the +402 response (extra.skill), and can be anchored on the ERC-8004 Identity +Registry with ` + "`obol skills calldata set-hash`" + `. + +To sell a skill's execution rather than its bytes, gate the agent that +carries it: ` + "`obol agent new --skills `" + ` then +` + "`obol sell agent `" + `. + +Examples: + obol sell skill quant-notes --from ./skills/quant-notes --skill-version 0.1.0 \ + --per-request 0.25 --chain base --pay-to 0x... + obol sell skill buy-x402 --from-embedded buy-x402 --skill-version 0.1.0 --price 0.05`, + Flags: []cli.Flag{ + &cli.StringFlag{ + Name: "from", + Usage: "Directory containing the skill to package (must contain SKILL.md)", + }, + &cli.StringFlag{ + Name: "from-embedded", + Usage: "Name of an embedded obol skill to package (mutually exclusive with --from)", + }, + &cli.StringFlag{ + Name: "skill-name", + Usage: "Skill name for the @ ref (default: the embedded skill name with --from-embedded, otherwise the offer name)", + }, + &cli.StringFlag{ + Name: "skill-version", + Usage: "Skill version for the @ ref (e.g. 0.1.0)", + Required: true, + }, + &cli.StringFlag{ + Name: "display-name", + Usage: "Human-friendly display name for catalog surfaces", + }, + &cli.StringFlag{ + Name: "description", + Aliases: []string{"register-description"}, + Usage: "Human-readable description. Surfaced on the 402 payment page, in the storefront catalog, and on the ERC-8004 registration document.", + }, + payToFlag("Payment recipient address"), + &cli.StringFlag{ + Name: "chain", + Usage: "Payment chain (base, base-sepolia, ethereum)", + Value: "base", + }, + &cli.StringFlag{ + Name: "token", + Usage: "Payment token (USDC, OBOL)", + Value: "USDC", + }, + &cli.StringFlag{ + Name: "price", + Usage: "Per-request price in the selected payment token (one paid request = one bundle download)", + }, + &cli.StringFlag{ + Name: "per-request", + Usage: "Per-request price (alias for --price)", + }, + &cli.StringFlag{ + Name: "path", + Usage: "URL path prefix (default: /services/)", + }, + &cli.IntFlag{ + Name: "max-timeout", + Usage: "Payment validity window in seconds", + Value: 300, + }, + &cli.StringFlag{ + Name: "namespace", + Aliases: []string{"n"}, + Usage: "Namespace for the ServiceOffer AND the bundle ConfigMap (must match — the controller reads the ConfigMap from the offer's namespace)", + Value: "default", + }, + &cli.BoolFlag{ + Name: "no-register", + Usage: "Skip ERC-8004 registration metadata. Useful for local dev.", + }, + &cli.StringFlag{ + Name: "register-name", + Usage: "Agent name for ERC-8004 registration (defaults to the offer name)", + }, + }, + Action: func(ctx context.Context, cmd *cli.Command) error { + u := getUI(cmd) + if cmd.NArg() != 1 { + return fmt.Errorf("offer name required: obol sell skill (--from | --from-embedded )") + } + name := strings.TrimSpace(cmd.Args().First()) + if err := validate.Name(name); err != nil { + return err + } + + version := strings.TrimSpace(cmd.String("skill-version")) + if !skillVersionRe.MatchString(version) || len(version) > 64 { + return fmt.Errorf("invalid --skill-version %q: must match %s (max 64 chars), e.g. 0.1.0", version, skillVersionRe) + } + + price := strings.TrimSpace(cmd.String("price")) + if price == "" { + price = strings.TrimSpace(cmd.String("per-request")) + } + if price == "" { + return fmt.Errorf("price required: use --price or --per-request (skills are priced per request — one paid request, one download)") + } + + return runSellSkillShare(ctx, cfg, u, cmd, name, version, price) + }, + } +} + +// runSellSkillShare is SHARE mode: pack → ConfigMap → type=skill offer. +func runSellSkillShare(_ context.Context, cfg *config.Config, u *ui.UI, cmd *cli.Command, name, version, price string) error { + from := strings.TrimSpace(cmd.String("from")) + fromEmbedded := strings.TrimSpace(cmd.String("from-embedded")) + if err := validateSkillSourceFlags(from, fromEmbedded); err != nil { + return err + } + + srcDir := from + skillName := name + if fromEmbedded != "" { + dir, cleanup, err := materializeEmbeddedSkill(fromEmbedded) + if err != nil { + return err + } + defer cleanup() + srcDir = dir + skillName = fromEmbedded + } + if override := strings.TrimSpace(cmd.String("skill-name")); override != "" { + skillName = override + } + if !skillNameRe.MatchString(skillName) || len(skillName) > 64 { + return fmt.Errorf("invalid skill name %q: must match %s (max 64 chars); pass --skill-name to override", skillName, skillNameRe) + } + + if info, err := os.Stat(srcDir); err != nil || !info.IsDir() { + return fmt.Errorf("--from %q is not a readable directory", srcDir) + } + + // Pack deterministically. Pack enforces the post-gzip size cap + // (monetizeapi.MaxSkillBundleBytes) and the SKILL.md requirement. + gz, hash, err := skillpkg.Pack(os.DirFS(srcDir)) + if err != nil { + return err + } + // Warn-only secret scan: the bundle is published verbatim to every + // buyer, so surface anything that smells like a credential. + if warnings, scanErr := skillpkg.ScanSecrets(os.DirFS(srcDir)); scanErr == nil { + for _, w := range warnings { + u.Warnf("bundle content: %s", w) + } + } else { + u.Warnf("bundle secret scan failed (publishing anyway — inspect the bundle yourself): %v", scanErr) + } + + if err := kubectl.EnsureCluster(cfg); err != nil { + return fmt.Errorf("Obol Stack is not running. Start it with `obol stack up` first") + } + + ns := cmd.String("namespace") + + // Crypto payment resolution — same branch as `sell http` (card + // payments are deliberately not offered on sell skill v0). + wallet := strings.TrimSpace(cmd.String("pay-to")) + if wallet == "" { + if resolved, rerr := hermes.ResolveWalletAddress(cfg); rerr == nil { + wallet = resolved + u.Infof("Using wallet from remote-signer: %s", wallet) + } else if u.IsTTY() { + var inputErr error + wallet, inputErr = u.Input("Wallet address (payment recipient)", "") + if inputErr != nil || wallet == "" { + return fmt.Errorf("recipient required: use --pay-to or set X402_WALLET") + } + } else { + return fmt.Errorf("recipient required: use --pay-to or set X402_WALLET") + } + } + if err := x402verifier.ValidateWallet(wallet); err != nil { + return err + } + x402verifier.PopulateCABundle(cfg) + + chainName := cmd.String("chain") + assetTerms, err := resolveAssetTerms(cmd, &chainName) + if err != nil { + return err + } + symbol := assetTerms.Symbol + if symbol == "" { + symbol = strings.ToUpper(cmd.String("token")) + } + + // Registration block: same builder as `sell http`, with the skill + // surfaced for discovery plus integrity metadata for ERC-8004. + reg, registerEnabled, err := buildSellRegistrationConfig(name, sellRegistrationInput{ + NoRegister: cmd.Bool("no-register"), + Name: cmd.String("register-name"), + Description: cmd.String("description"), + Skills: []string{skillName}, + }) + if err != nil { + return err + } + if registerEnabled { + reg["metadata"] = map[string]string{ + "skillName": skillName, + "skillVersion": version, + "skillSha256": hash, + } + } else { + reg = nil + } + + bundleCM := buildSkillBundleConfigMapManifest(skillBundleConfigMapName(name), ns, gz) + offer := buildSkillShareOfferManifest(skillShareOfferInputs{ + OfferName: name, + Namespace: ns, + SkillName: skillName, + Version: version, + SHA256: hash, + BundleConfigMap: skillBundleConfigMapName(name), + DisplayName: strings.TrimSpace(cmd.String("display-name")), + Description: strings.TrimSpace(cmd.String("description")), + PayTo: wallet, + Chain: chainName, + Price: price, + MaxTimeout: cmd.Int("max-timeout"), + AssetTerms: assetTerms, + Path: strings.TrimSpace(cmd.String("path")), + Registration: reg, + }) + + if err := preflightOfferPathCollision(cfg, offer); err != nil { + return err + } + + // The bundle ConfigMap MUST go through server-side apply: client- + // side apply copies the whole object (base64 bundle included) into + // the last-applied-configuration annotation, which blows the 256KiB + // annotation cap for any bundle over ~190KB. + if err := applyConfigMapServerSide(cfg, bundleCM); err != nil { + return fmt.Errorf("apply bundle ConfigMap: %w", err) + } + + applyOut, err := kubectlApplyOutput(cfg, offer) + if err != nil { + return fmt.Errorf("apply ServiceOffer: %w", err) + } + if persistErr := persistServiceOffer(cfg, ns, name, skillOfferBundle(ns, name, bundleCM, offer)); persistErr != nil { + u.Warnf("could not persist offer for resume: %v", persistErr) + } + + action := "created" + if strings.Contains(applyOut, "configured") || strings.Contains(applyOut, "unchanged") { + action = "updated" + } + u.Successf("ServiceOffer %s/%s %s (type: skill, %s@%s, %s %s/download → %s)", ns, name, action, skillName, version, price, symbol, wallet) + u.Infof("Bundle: %d bytes gzipped, sha256 %s", len(gz), hash) + u.Infof("The controller will verify the hash → publish the bundle server → payment gate → route") + u.Infof("Check status: obol sell status %s -n %s", name, ns) + + servicePath := strings.TrimSpace(cmd.String("path")) + if servicePath == "" { + servicePath = "/services/" + name + } + baseURL := "http://obol.stack:8080" + if tURL, terr := tunnel.EnsureTunnelForSell(cfg, u); terr != nil { + u.Warnf("Tunnel not started: %v", terr) + u.Dim(" Start manually with: obol tunnel restart") + } else { + baseURL = strings.TrimRight(tURL, "/") + u.Successf("Tunnel: %s%s", baseURL, servicePath) + } + + printSkillPurchaseInstructions(u, baseURL, servicePath, skillName, version, chainName, hash) + + if !cmd.Bool("no-register") { + u.Dim("On-chain identity: obol sell register --chain " + chainName + " (once), then anchor the hash above.") + } + return nil +} + +// printSkillPurchaseInstructions renders the buyer-facing steps plus +// the seller's set-hash hint. Split out so the share flow stays +// readable. +// +// buy.py pay is text-only: it prints diagnostics before the body and +// decodes the body with errors="replace", so redirecting it to a file +// corrupts binary artifacts. Point it at /skill.json (JSON metadata) +// and steer the bundle download to a binary-safe x402 client. +func printSkillPurchaseInstructions(u *ui.UI, baseURL, servicePath, skillName, version, chain, hash string) { + bundleURL := baseURL + servicePath + "/bundle.tar.gz" + metadataURL := baseURL + servicePath + "/skill.json" + u.Blank() + u.Bold("Buy it (one paid request = one download):") + u.Printf(" Probe pricing: curl -i %s", bundleURL) + u.Printf(" Paid metadata: buy.py pay %s", metadataURL) + u.Printf(" Paid download: fetch %s with a binary-safe x402 client, save as %s-%s.tar.gz", bundleURL, skillName, version) + u.Dim(" (buy.py pay prints the body as text — do NOT redirect it to a file for the bundle)") + u.Printf(" Verify bundle: obol skills verify %s-%s.tar.gz --agent-id --skill %s@%s --chain %s", + skillName, version, skillName, version, chain) + u.Blank() + u.Bold("Anchor the bundle hash on ERC-8004 (sellers — submitted with YOUR wallet):") + u.Printf(" obol skills calldata set-hash %s@%s --agent-id --hash %s --chain %s", + skillName, version, hash, chain) +} + +// validateSkillSourceFlags enforces the --from XOR --from-embedded +// contract for SHARE mode. +func validateSkillSourceFlags(from, fromEmbedded string) error { + switch { + case from != "" && fromEmbedded != "": + return fmt.Errorf("--from and --from-embedded are mutually exclusive — pass exactly one") + case from == "" && fromEmbedded == "": + return fmt.Errorf("bundle source required: --from or --from-embedded ") + default: + return nil + } +} + +// materializeEmbeddedSkill copies one embedded skill into a temp dir +// (the same normalization path as agent seeding) and returns the +// per-skill directory to pack from. Caller must invoke cleanup. +func materializeEmbeddedSkill(name string) (dir string, cleanup func(), err error) { + names, err := embed.GetEmbeddedSkillNames() + if err != nil { + return "", nil, err + } + if !slices.Contains(names, name) { + return "", nil, fmt.Errorf("embedded skill %q not found; available: %s", name, strings.Join(names, ", ")) + } + tmp, err := os.MkdirTemp("", "obol-sell-skill-*") + if err != nil { + return "", nil, fmt.Errorf("create temp dir: %w", err) + } + cleanup = func() { _ = os.RemoveAll(tmp) } + if err := embed.WriteSkillSubset(tmp, []string{name}); err != nil { + cleanup() + return "", nil, err + } + return filepath.Join(tmp, name), cleanup, nil +} + +// buildSkillBundleConfigMapManifest renders the operator-owned bundle +// ConfigMap: binaryData[monetizeapi.SkillBundleKey] = gzipped tarball. +func buildSkillBundleConfigMapManifest(cmName, ns string, gz []byte) map[string]any { + return map[string]any{ + "apiVersion": "v1", + "kind": "ConfigMap", + "metadata": map[string]any{ + "name": cmName, + "namespace": ns, + "labels": map[string]any{ + "app.kubernetes.io/managed-by": "obol-cli", + "obol.org/skill-bundle": "true", + }, + }, + "binaryData": map[string]any{ + monetizeapi.SkillBundleKey: base64.StdEncoding.EncodeToString(gz), + }, + } +} + +// skillShareOfferInputs carries everything buildSkillShareOfferManifest +// needs; a struct so the pure builder stays unit-testable without a +// cli.Command. +type skillShareOfferInputs struct { + OfferName string + Namespace string + SkillName string + Version string + SHA256 string + BundleConfigMap string + DisplayName string + Description string + PayTo string + Chain string + Price string + MaxTimeout int + AssetTerms schemas.AssetTerms + Path string + Registration map[string]any // nil omits the block +} + +// buildSkillShareOfferManifest assembles the type=skill ServiceOffer. +// spec.upstream is pinned to the controller's deterministic bundle- +// server name so reconcileUpstream and routeRuleFromOffer need zero +// changes — and so the controller can reject spoofed upstreams (a skill +// offer may only ever advertise its own bundle server). +func buildSkillShareOfferManifest(in skillShareOfferInputs) map[string]any { + payment := map[string]any{ + "scheme": "exact", + "network": in.Chain, + "payTo": in.PayTo, + "maxTimeoutSeconds": in.MaxTimeout, + "price": map[string]any{ + "perRequest": in.Price, + }, + } + if !in.AssetTerms.IsZero() { + payment["asset"] = in.AssetTerms + } + + skill := map[string]any{ + "name": in.SkillName, + "version": in.Version, + "sha256": strings.ToLower(in.SHA256), + "bundleConfigMap": in.BundleConfigMap, + } + if in.DisplayName != "" { + skill["displayName"] = in.DisplayName + } + if in.Description != "" { + skill["description"] = in.Description + } + + spec := map[string]any{ + "type": "skill", + "skill": skill, + "upstream": map[string]any{ + "service": monetizeapi.SkillBundleWorkloadName(in.OfferName), + "namespace": in.Namespace, + "port": 8080, + "healthPath": "/skill.json", + }, + "payment": payment, + } + if in.Path != "" { + spec["path"] = in.Path + } + if in.Registration != nil { + spec["registration"] = in.Registration + } + + return map[string]any{ + "apiVersion": "obol.org/v1alpha1", + "kind": "ServiceOffer", + "metadata": map[string]any{ + "name": in.OfferName, + "namespace": in.Namespace, + }, + "spec": spec, + } +} + +// skillOfferBundle wraps the bundle ConfigMap + type=skill ServiceOffer +// in a v1 List for the resume ledger, modeled on agentOfferBundle. The +// ConfigMap precedes the offer so a replay lands the artifact before +// the controller reconciles the offer against it. The resume path +// routes kind=ConfigMap items through server-side apply (see +// resumeApplyManifest) — replaying the bundle client-side would blow +// the 256KiB last-applied-configuration annotation cap. +func skillOfferBundle(offerNs, name string, bundleCM, offer map[string]any) map[string]any { + return map[string]any{ + "apiVersion": "v1", + "kind": "List", + "metadata": map[string]any{"name": name, "namespace": offerNs}, + "items": []any{bundleCM, offer}, + } +} + +// applyConfigMapServerSide applies one ConfigMap manifest with +// `kubectl apply --server-side --force-conflicts`. Server-side apply +// keeps the (potentially ~900KB) binaryData payload out of the +// last-applied-configuration annotation, which client-side apply would +// overflow at 256KiB. +func applyConfigMapServerSide(cfg *config.Config, manifest map[string]any) error { + raw, err := json.Marshal(manifest) + if err != nil { + return fmt.Errorf("marshal ConfigMap manifest: %w", err) + } + bin, kc := kubectl.Paths(cfg) + return kubectl.ApplyServerSideForceConflicts(bin, kc, raw, "obol-cli") +} + +// resumeApplyManifest replays one persisted ledger manifest. Plain +// manifests keep the legacy client-side apply. v1 List bundles are +// applied item by item in order, routing kind=ConfigMap items (skill +// bundle artifacts) through server-side apply — everything else (the +// namespace shims in agent bundles, the offers themselves) stays +// client-side. +func resumeApplyManifest(cfg *config.Config, manifest map[string]any) error { + if manifest["kind"] != "List" { + return kubectlApply(cfg, manifest) + } + items, _ := manifest["items"].([]any) + for _, it := range items { + m, ok := it.(map[string]any) + if !ok { + return fmt.Errorf("malformed List item %T in persisted offer bundle", it) + } + if m["kind"] == "ConfigMap" { + if err := applyConfigMapServerSide(cfg, m); err != nil { + return err + } + continue + } + if err := kubectlApply(cfg, m); err != nil { + return err + } + } + return nil +} diff --git a/cmd/obol/sell_skill_test.go b/cmd/obol/sell_skill_test.go new file mode 100644 index 00000000..10836c5a --- /dev/null +++ b/cmd/obol/sell_skill_test.go @@ -0,0 +1,330 @@ +package main + +import ( + "bytes" + "encoding/base64" + "os" + "path/filepath" + "strings" + "testing" + + "github.com/ObolNetwork/obol-stack/internal/embed" + "github.com/ObolNetwork/obol-stack/internal/monetizeapi" + "github.com/ObolNetwork/obol-stack/internal/schemas" + "github.com/ObolNetwork/obol-stack/internal/skillpkg" + "github.com/ObolNetwork/obol-stack/internal/ui" + "github.com/urfave/cli/v3" +) + +func TestSellCommand_IncludesSkillSubcommand(t *testing.T) { + cfg := newTestConfig(t) + if c := findSubcommand(t, sellCommand(cfg), "skill"); c.ArgsUsage != "" { + t.Errorf("sell skill ArgsUsage = %q, want ", c.ArgsUsage) + } +} + +func TestSellSkill_Flags(t *testing.T) { + cfg := newTestConfig(t) + skill := findSubcommand(t, sellCommand(cfg), "skill") + flags := flagMap(skill) + + requireFlags(t, flags, + "from", "from-embedded", "skill-name", "skill-version", + "display-name", "description", + "pay-to", "chain", "token", "price", "per-request", + "path", "max-timeout", "namespace", + "no-register", "register-name", + ) + + // Selling a skill's execution is `obol sell agent`, not a flag here: + // the as-service sugar was removed as redundant. + for _, name := range []string{"as-service", "agent"} { + if _, ok := flags[name]; ok { + t.Errorf("flag --%s must not exist on sell skill (sell a skill's execution via `obol sell agent`)", name) + } + } + + // Payment flag set mirrors sell http. + assertStringDefault(t, flags, "chain", "base") + assertStringDefault(t, flags, "token", "USDC") + assertStringDefault(t, flags, "namespace", "default") + assertIntDefault(t, flags, "max-timeout", 300) + assertFlagHasAlias(t, flags, "pay-to", "wallet") + assertFlagHasAlias(t, flags, "namespace", "n") + + assertFlagRequired(t, flags, "skill-version") + + // Skills are per-request only in v0 — no per-mtok/per-hour. + for _, name := range []string{"per-mtok", "per-hour"} { + if _, ok := flags[name]; ok { + t.Errorf("flag --%s must not exist on sell skill (per-request pricing only)", name) + } + } +} + +func TestValidateSkillSourceFlags(t *testing.T) { + tests := []struct { + name string + from string + fromEmbedded string + wantErr string + }{ + {name: "from only", from: "./skills/x", fromEmbedded: ""}, + {name: "embedded only", from: "", fromEmbedded: "buy-x402"}, + {name: "both", from: "./skills/x", fromEmbedded: "buy-x402", wantErr: "mutually exclusive"}, + {name: "neither", from: "", fromEmbedded: "", wantErr: "bundle source required"}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + err := validateSkillSourceFlags(tt.from, tt.fromEmbedded) + if tt.wantErr == "" { + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + return + } + if err == nil || !strings.Contains(err.Error(), tt.wantErr) { + t.Fatalf("err = %v, want substring %q", err, tt.wantErr) + } + }) + } +} + +// TestMaterializeEmbeddedSkill_PacksDeterministically exercises the +// --from-embedded path end to end: materialize a real embedded skill +// twice and prove the two packs hash identically (both source modes +// share one normalization). +func TestMaterializeEmbeddedSkill_PacksDeterministically(t *testing.T) { + names, err := embed.GetEmbeddedSkillNames() + if err != nil || len(names) == 0 { + t.Fatalf("no embedded skills available: %v", err) + } + name := names[0] + + pack := func() string { + dir, cleanup, err := materializeEmbeddedSkill(name) + if err != nil { + t.Fatal(err) + } + defer cleanup() + if _, err := os.Stat(filepath.Join(dir, "SKILL.md")); err != nil { + t.Fatalf("materialized skill %s missing SKILL.md: %v", name, err) + } + _, hash, err := skillpkg.Pack(os.DirFS(dir)) + if err != nil { + t.Fatal(err) + } + return hash + } + + if h1, h2 := pack(), pack(); h1 != h2 { + t.Errorf("two materializations hash differently: %s vs %s", h1, h2) + } +} + +func TestMaterializeEmbeddedSkill_UnknownName(t *testing.T) { + _, _, err := materializeEmbeddedSkill("definitely-not-a-skill") + if err == nil || !strings.Contains(err.Error(), "not found") { + t.Fatalf("err = %v, want not-found listing available skills", err) + } +} + +func TestSkillBundleConfigMapName(t *testing.T) { + if got := skillBundleConfigMapName("quant-notes"); got != "quant-notes-skill-bundle" { + t.Fatalf("skillBundleConfigMapName = %q, want quant-notes-skill-bundle", got) + } +} + +func TestBuildSkillBundleConfigMapManifest(t *testing.T) { + gz := []byte{0x1f, 0x8b, 0x08, 0x00} + m := buildSkillBundleConfigMapManifest("quant-skill-bundle", "default", gz) + + if m["kind"] != "ConfigMap" || m["apiVersion"] != "v1" { + t.Fatalf("unexpected kind/apiVersion: %v/%v", m["kind"], m["apiVersion"]) + } + md := m["metadata"].(map[string]any) + if md["name"] != "quant-skill-bundle" || md["namespace"] != "default" { + t.Errorf("metadata = %v", md) + } + bd := m["binaryData"].(map[string]any) + enc, ok := bd[monetizeapi.SkillBundleKey].(string) + if !ok { + t.Fatalf("binaryData missing key %q", monetizeapi.SkillBundleKey) + } + dec, err := base64.StdEncoding.DecodeString(enc) + if err != nil || string(dec) != string(gz) { + t.Errorf("binaryData does not base64 round-trip: %v", err) + } +} + +func TestBuildSkillShareOfferManifest(t *testing.T) { + hash := strings.Repeat("AB", 32) // uppercase in, lowercase out + in := skillShareOfferInputs{ + OfferName: "quant-notes", + Namespace: "default", + SkillName: "quant-notes", + Version: "0.1.0", + SHA256: hash, + BundleConfigMap: "quant-notes-skill-bundle", + DisplayName: "Quant Notes", + Description: "daily quant notes skill", + PayTo: "0x1111111111111111111111111111111111111111", + Chain: "base-sepolia", + Price: "0.25", + MaxTimeout: 300, + Registration: map[string]any{ + "enabled": true, + "skills": []string{"quant-notes"}, + "metadata": map[string]string{ + "skillName": "quant-notes", + "skillVersion": "0.1.0", + "skillSha256": strings.ToLower(hash), + }, + }, + } + m := buildSkillShareOfferManifest(in) + + spec := m["spec"].(map[string]any) + if spec["type"] != "skill" { + t.Fatalf("spec.type = %v, want skill", spec["type"]) + } + + skill := spec["skill"].(map[string]any) + if skill["name"] != "quant-notes" || skill["version"] != "0.1.0" { + t.Errorf("skill identity = %v", skill) + } + if skill["sha256"] != strings.ToLower(hash) { + t.Errorf("sha256 = %v, want lowercase %s (CRD pattern is lowercase-only)", skill["sha256"], strings.ToLower(hash)) + } + if skill["bundleConfigMap"] != "quant-notes-skill-bundle" { + t.Errorf("bundleConfigMap = %v", skill["bundleConfigMap"]) + } + if skill["displayName"] != "Quant Notes" || skill["description"] != "daily quant notes skill" { + t.Errorf("display fields = %v", skill) + } + + // Upstream is pinned to the controller's deterministic bundle-server + // name — the anti-spoof invariant the controller enforces. + up := spec["upstream"].(map[string]any) + if up["service"] != monetizeapi.SkillBundleWorkloadName("quant-notes") { + t.Errorf("upstream.service = %v, want %s", up["service"], monetizeapi.SkillBundleWorkloadName("quant-notes")) + } + if up["namespace"] != "default" || up["port"] != 8080 || up["healthPath"] != "/skill.json" { + t.Errorf("upstream = %v", up) + } + + pay := spec["payment"].(map[string]any) + if pay["network"] != "base-sepolia" || pay["payTo"] != in.PayTo { + t.Errorf("payment = %v", pay) + } + if price := pay["price"].(map[string]any); price["perRequest"] != "0.25" { + t.Errorf("price = %v", price) + } + if _, hasPath := spec["path"]; hasPath { + t.Error("spec.path must be omitted when unset") + } + if _, hasReg := spec["registration"]; !hasReg { + t.Error("spec.registration missing") + } + + // No-registration variant omits the block entirely. + in.Registration = nil + in.Path = "/services/custom" + m2 := buildSkillShareOfferManifest(in) + spec2 := m2["spec"].(map[string]any) + if _, hasReg := spec2["registration"]; hasReg { + t.Error("spec.registration must be omitted when nil") + } + if spec2["path"] != "/services/custom" { + t.Errorf("spec.path = %v", spec2["path"]) + } +} + +func TestBuildSkillShareOfferManifest_AssetTerms(t *testing.T) { + in := skillShareOfferInputs{ + OfferName: "x", Namespace: "default", SkillName: "x", Version: "1", + SHA256: strings.Repeat("a", 64), BundleConfigMap: "x-skill-bundle", + PayTo: "0x1111111111111111111111111111111111111111", Chain: "ethereum", + Price: "10", MaxTimeout: 300, + AssetTerms: schemas.AssetTerms{Address: "0xdead", Symbol: "OBOL", Decimals: 18}, + } + spec := buildSkillShareOfferManifest(in)["spec"].(map[string]any) + if _, ok := spec["payment"].(map[string]any)["asset"]; !ok { + t.Error("payment.asset missing for non-default token") + } +} + +func TestSkillOfferBundle_ShapeAndType(t *testing.T) { + cm := buildSkillBundleConfigMapManifest("x-skill-bundle", "default", []byte("gz")) + offer := buildSkillShareOfferManifest(skillShareOfferInputs{ + OfferName: "x", Namespace: "default", SkillName: "x", Version: "1", + SHA256: strings.Repeat("a", 64), BundleConfigMap: "x-skill-bundle", + PayTo: "0x1111111111111111111111111111111111111111", Chain: "base", + Price: "0.1", MaxTimeout: 300, + }) + bundle := skillOfferBundle("default", "x", cm, offer) + + if bundle["kind"] != "List" { + t.Fatalf("bundle kind = %v, want List", bundle["kind"]) + } + items := bundle["items"].([]any) + if len(items) != 2 { + t.Fatalf("items = %d, want 2", len(items)) + } + if items[0].(map[string]any)["kind"] != "ConfigMap" { + t.Error("first item must be the bundle ConfigMap (replayed before the offer)") + } + if items[1].(map[string]any)["kind"] != "ServiceOffer" { + t.Error("second item must be the ServiceOffer") + } + + // The resume ledger reports the inner offer's type for List bundles. + if got := manifestOfferType(bundle); got != "skill" { + t.Errorf("manifestOfferType = %q, want skill", got) + } + if ns, name := manifestNSName(bundle); ns != "default" || name != "x" { + t.Errorf("manifestNSName = (%q, %q)", ns, name) + } +} + +// TestSellSkill_RequiredFlagEnforced runs the command without +// --skill-version and expects urfave/cli's required-flag error before +// the action runs (no cluster involved). +func TestSellSkill_RequiredFlagEnforced(t *testing.T) { + cfg := newTestConfig(t) + root := &cli.Command{Commands: []*cli.Command{sellCommand(cfg)}} + err := root.Run(t.Context(), []string{"obol", "sell", "skill", "x", "--from", t.TempDir()}) + if err == nil || !strings.Contains(err.Error(), "skill-version") { + t.Fatalf("err = %v, want required-flag error naming skill-version", err) + } +} + +// TestPrintSkillPurchaseInstructions_BinarySafe pins the buyer-facing +// copy: buy.py pay is text-only (diagnostics before the body, lossy +// decode), so the printed instructions must point it at /skill.json and +// must never tell buyers to redirect it into the bundle file. +func TestPrintSkillPurchaseInstructions_BinarySafe(t *testing.T) { + var out, errOut bytes.Buffer + u := ui.NewForTest(&out, &errOut) + + printSkillPurchaseInstructions(u, "https://x.example.com", "/services/gas-skill", + "gas", "0.1.0", "base-sepolia", strings.Repeat("a", 64)) + got := out.String() + errOut.String() + + if strings.Contains(got, "buy.py pay https://x.example.com/services/gas-skill/bundle.tar.gz") { + t.Error("instructions must not run buy.py pay against bundle.tar.gz (text-only, corrupts gzip bytes)") + } + if strings.Contains(got, "> gas-0.1.0.tar.gz") { + t.Error("instructions must not redirect buy.py pay stdout into the bundle file") + } + for _, want := range []string{ + "buy.py pay https://x.example.com/services/gas-skill/skill.json", + "binary-safe x402 client", + "obol skills verify gas-0.1.0.tar.gz --agent-id --skill gas@0.1.0 --chain base-sepolia", + } { + if !strings.Contains(got, want) { + t.Errorf("instructions missing %q\noutput:\n%s", want, got) + } + } +} diff --git a/cmd/obol/sell_test.go b/cmd/obol/sell_test.go index d363a232..1e5fecac 100644 --- a/cmd/obol/sell_test.go +++ b/cmd/obol/sell_test.go @@ -285,16 +285,119 @@ func TestSellHTTP_Flags(t *testing.T) { "namespace", "upstream", "port", "health-path", "path", "max-timeout", "register", "no-register", "register-name", "register-description", "register-image", + "pay-with", "stripe-account", "card-currency", "stripe-network-id", ) assertStringDefault(t, flags, "chain", "base") assertStringDefault(t, flags, "token", "USDC") assertStringDefault(t, flags, "namespace", "default") assertStringDefault(t, flags, "health-path", "/health") + assertStringDefault(t, flags, "pay-with", "crypto") + assertStringDefault(t, flags, "card-currency", "usd") assertIntDefault(t, flags, "port", 8080) assertIntDefault(t, flags, "max-timeout", 300) } +func TestNormalizePayWith(t *testing.T) { + cases := map[string]string{ + "": payMethodCrypto, + " ": payMethodCrypto, + "crypto": payMethodCrypto, + "CRYPTO": payMethodCrypto, + "card": payMethodCard, + " Card ": payMethodCard, + "unknown": "unknown", // passthrough; caller rejects + } + for in, want := range cases { + if got := normalizePayWith(in); got != want { + t.Errorf("normalizePayWith(%q) = %q, want %q", in, got, want) + } + } +} + +// runCardResolve builds a minimal cli.Command carrying the card flags, +// parses args, and returns resolveCardPayment's result. +func runCardResolve(t *testing.T, args ...string) (map[string]any, error) { + t.Helper() + var ( + out map[string]any + rerr error + ) + cmd := &cli.Command{ + Name: "http", + Flags: []cli.Flag{ + &cli.StringFlag{Name: "pay-with", Value: payMethodCard}, + &cli.StringFlag{Name: "stripe-account"}, + &cli.StringFlag{Name: "card-currency", Value: "usd"}, + &cli.StringFlag{Name: "stripe-network-id"}, + &cli.IntFlag{Name: "max-timeout", Value: 300}, + }, + Action: func(_ context.Context, c *cli.Command) error { + out, rerr = resolveCardPayment(c, map[string]any{"perRequest": "0.01"}) + return nil + }, + } + if err := cmd.Run(context.Background(), append([]string{"http"}, args...)); err != nil { + t.Fatalf("cmd.Run: %v", err) + } + return out, rerr +} + +func TestResolveCardPayment_Valid(t *testing.T) { + out, err := runCardResolve(t, "--stripe-account", "acct_1A2b3C4d", "--card-currency", "eur", "--stripe-network-id", "stripenet_test") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if out["method"] != payMethodCard { + t.Errorf("method = %v, want card", out["method"]) + } + card, ok := out["card"].(map[string]any) + if !ok { + t.Fatalf("card block missing/not a map: %v", out["card"]) + } + if card["account"] != "acct_1A2b3C4d" { + t.Errorf("card.account = %v, want acct_1A2b3C4d", card["account"]) + } + if card["provider"] != "stripe" { + t.Errorf("card.provider = %v, want stripe", card["provider"]) + } + if card["currency"] != "eur" { + t.Errorf("card.currency = %v, want eur", card["currency"]) + } + if card["networkId"] != "stripenet_test" { + t.Errorf("card.networkId = %v, want stripenet_test", card["networkId"]) + } + if _, ok := out["price"].(map[string]any); !ok { + t.Errorf("price block missing: %v", out["price"]) + } + // payTo / network must NOT leak into a card payment. + if _, ok := out["payTo"]; ok { + t.Error("card payment must not contain payTo") + } + if _, ok := out["network"]; ok { + t.Error("card payment must not contain network") + } +} + +func TestResolveCardPayment_Invalid(t *testing.T) { + cases := []struct { + name string + args []string + }{ + {"missing account", []string{"--card-currency", "usd"}}, + {"bad account prefix", []string{"--stripe-account", "0xdeadbeef"}}, + {"bad currency", []string{"--stripe-account", "acct_x1", "--card-currency", "US"}}, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + _, err := runCardResolve(t, tc.args...) + if err == nil { + t.Fatalf("expected error for %s", tc.name) + } + }) + } +} + func TestBuildSellRegistrationConfig_DefaultEnabled(t *testing.T) { reg, enabled, err := buildSellRegistrationConfig("demo", sellRegistrationInput{}) if err != nil { diff --git a/cmd/obol/skills.go b/cmd/obol/skills.go new file mode 100644 index 00000000..a82f21e1 --- /dev/null +++ b/cmd/obol/skills.go @@ -0,0 +1,442 @@ +package main + +// obol skills — skill-marketplace utilities on top of ERC-8004: +// anchoring a bundle's sha256 on the Identity Registry, rating skills +// via the Reputation Registry (ERC-8239 draft tag convention, obol +// interim form), reading aggregate reputation, and verifying a +// downloaded bundle against the on-chain hash. +// +// Calldata-printer pattern throughout: the CLI prints to+data, the +// OPERATOR (or buyer) submits with their own wallet. obol NEVER signs. +// +// Distinct from `obol openclaw skills`, which manages skill files on an +// OpenClaw instance's PVC. + +import ( + "context" + "crypto/sha256" + "encoding/hex" + "fmt" + "math/big" + "os" + "regexp" + "strings" + + "github.com/ObolNetwork/obol-stack/internal/config" + "github.com/ObolNetwork/obol-stack/internal/erc8004" + "github.com/ObolNetwork/obol-stack/internal/stack" + "github.com/ethereum/go-ethereum/common" + "github.com/urfave/cli/v3" +) + +func skillsCommand(cfg *config.Config) *cli.Command { + return &cli.Command{ + Name: "skills", + Usage: "Skill marketplace: anchor bundle hashes, rate skills, read reputation, verify downloads (ERC-8004)", + Commands: []*cli.Command{ + skillsCalldataCommand(cfg), + skillsReputationCommand(cfg), + skillsVerifyCommand(cfg), + }, + } +} + +func skillsCalldataCommand(cfg *config.Config) *cli.Command { + return &cli.Command{ + Name: "calldata", + Usage: "Print ERC-8004 calldata for skill operations (submitted with YOUR wallet — obol NEVER signs)", + Commands: []*cli.Command{ + skillsCalldataSetHashCommand(cfg), + skillsCalldataFeedbackCommand(cfg), + }, + } +} + +// skillsCalldataSetHashCommand prints IdentityRegistry.setMetadata +// calldata anchoring a skill bundle's sha256 under the key +// "skill.sha256:@". +func skillsCalldataSetHashCommand(cfg *config.Config) *cli.Command { + return &cli.Command{ + Name: "set-hash", + Usage: "Print IdentityRegistry setMetadata calldata anchoring a skill bundle's sha256", + ArgsUsage: "@", + Description: `Anchors the bundle hash on the seller's ERC-8004 agent so buyers can +verify a paid download against the chain (obol skills verify). + +The hash comes from --hash (printed by ` + "`obol sell skill`" + `) or is +computed from a local bundle with --from-bundle. The metadata value is +stored as the 64-char ASCII lowercase hex string. + +Example: + obol skills calldata set-hash quant-notes@0.1.0 --agent-id 42 --hash --chain base`, + Flags: []cli.Flag{ + &cli.Int64Flag{Name: "agent-id", Usage: "[REQUIRED] Your ERC-8004 agent id (Identity Registry tokenId)", Required: true}, + &cli.StringFlag{Name: "chain", Usage: "Registration chain (base, base-sepolia, ethereum)", Value: "base"}, + &cli.StringFlag{Name: "skill", Usage: "Skill ref @ (alternative to the positional argument)"}, + &cli.StringFlag{Name: "hash", Usage: "Bundle sha256 as 64 hex chars (with or without 0x prefix)"}, + &cli.StringFlag{Name: "from-bundle", Aliases: []string{"bundle"}, Usage: "Path to a bundle.tar.gz to hash instead of --hash"}, + }, + Action: func(ctx context.Context, cmd *cli.Command) error { + ref, err := skillRefFromCmd(cmd) + if err != nil { + return err + } + + hashArg := strings.TrimSpace(cmd.String("hash")) + bundlePath := strings.TrimSpace(cmd.String("from-bundle")) + var hexHash string + switch { + case hashArg != "" && bundlePath != "": + return fmt.Errorf("--hash and --from-bundle are mutually exclusive — pass exactly one") + case hashArg != "": + hexHash, err = parseSkillHashArg(hashArg) + if err != nil { + return err + } + case bundlePath != "": + hexHash, err = sha256File(bundlePath) + if err != nil { + return err + } + default: + return fmt.Errorf("hash source required: --hash 0x or --from-bundle ") + } + + net, err := erc8004.ResolveNetwork(cmd.String("chain")) + if err != nil { + return err + } + key := erc8004.SkillHashMetadataKey(ref) + calldata, err := erc8004.EncodeSetMetadata(big.NewInt(cmd.Int64("agent-id")), key, []byte(hexHash)) + if err != nil { + return err + } + + fmt.Printf("Skill: %s\n", ref) + fmt.Printf("Metadata key: %s\n", key) + fmt.Printf("Metadata value: %s (ASCII hex sha256)\n", hexHash) + fmt.Printf("IdentityRegistry (%s): %s\n", net.Name, net.RegistryAddress) + fmt.Printf("Calldata: 0x%x\n", calldata) + fmt.Println("Submit with YOUR wallet (the agent owner; e.g. the agent remote-signer or cast send) — the controller NEVER signs.") + fmt.Println("Note: re-submitting an unchanged value reverts on-chain (the registry rejects no-op writes).") + return nil + }, + } +} + +// skillsCalldataFeedbackCommand prints ReputationRegistry.giveFeedback +// calldata rating one skill of one agent, tagged with the ERC-8239 +// draft convention (tag1 "asr:skill", obol interim tag2). +func skillsCalldataFeedbackCommand(cfg *config.Config) *cli.Command { + return &cli.Command{ + Name: "feedback", + Usage: "Print ReputationRegistry giveFeedback calldata rating a skill (buyer-submitted)", + ArgsUsage: "@", + Description: `Rates one skill of one seller agent with a 0-100 score. The rating is +tagged tag1="asr:skill" and tag2 in the documented obol interim form of +the ERC-8239 draft, so per-skill reputation aggregates cleanly. + +Example: + obol skills calldata feedback quant-notes@0.1.0 --agent-id 42 --value 95 --chain base`, + Flags: []cli.Flag{ + &cli.Int64Flag{Name: "agent-id", Usage: "[REQUIRED] The SELLER's ERC-8004 agent id (Identity Registry tokenId)", Required: true}, + &cli.IntFlag{Name: "value", Usage: "[REQUIRED] Score 0-100", Required: true}, + &cli.StringFlag{Name: "chain", Usage: "Chain hosting the registries (base, base-sepolia, ethereum)", Value: "base"}, + &cli.StringFlag{Name: "skill", Usage: "Skill ref @ (alternative to the positional argument)"}, + &cli.StringFlag{Name: "endpoint", Usage: "Optional endpoint the rating refers to (e.g. the offer URL)"}, + &cli.StringFlag{Name: "feedback-uri", Aliases: []string{"uri"}, Usage: "Optional URI of an off-chain document backing the rating"}, + &cli.StringFlag{Name: "feedback-hash", Aliases: []string{"hash"}, Usage: "Optional 32-byte hash (0x...) of the feedback document"}, + }, + Action: func(ctx context.Context, cmd *cli.Command) error { + ref, err := skillRefFromCmd(cmd) + if err != nil { + return err + } + value := cmd.Int("value") + if value < 0 || value > 100 { + return fmt.Errorf("--value must be 0-100, got %d", value) + } + + net, err := erc8004.ResolveNetwork(cmd.String("chain")) + if err != nil { + return err + } + registry, err := erc8004.ReputationRegistryAddress(cmd.String("chain")) + if err != nil { + return err + } + agentID := big.NewInt(cmd.Int64("agent-id")) + tag2, err := erc8004.SkillTag2(net, agentID, ref) + if err != nil { + return err + } + + fbHash := common.Hash{} + if h := strings.TrimSpace(cmd.String("feedback-hash")); h != "" { + raw, err := hex.DecodeString(strings.TrimPrefix(strings.ToLower(h), "0x")) + if err != nil || len(raw) != 32 { + return fmt.Errorf("--feedback-hash must be 32 bytes of hex (0x + 64 chars), got %q", h) + } + fbHash = common.BytesToHash(raw) + } + + calldata, err := erc8004.EncodeGiveFeedback( + agentID, + big.NewInt(int64(value)), + 0, // score is already 0-100, no fixed-point scaling + erc8004.SkillTag1, + tag2, + strings.TrimSpace(cmd.String("endpoint")), + strings.TrimSpace(cmd.String("feedback-uri")), + fbHash, + ) + if err != nil { + return err + } + + fmt.Printf("Feedback: skill %s on agent %s, score %d/100\n", ref, agentID, value) + fmt.Printf("tag1: %s\n", erc8004.SkillTag1) + fmt.Printf("tag2: %s\n", tag2) + fmt.Printf("ReputationRegistry (%s): %s\n", net.Name, registry) + fmt.Printf("Calldata: 0x%x\n", calldata) + fmt.Println("Submit with YOUR wallet (the buyer's) — self-feedback from the agent owner reverts on-chain; the controller NEVER signs.") + return nil + }, + } +} + +// skillsReputationCommand reads the aggregate per-skill rating via +// getSummary, filtered to the skill's tag pair. +func skillsReputationCommand(cfg *config.Config) *cli.Command { + return &cli.Command{ + Name: "reputation", + Usage: "Read a skill's aggregate on-chain rating (ReputationRegistry getSummary)", + ArgsUsage: "@", + Flags: []cli.Flag{ + &cli.Int64Flag{Name: "agent-id", Usage: "[REQUIRED] The seller's ERC-8004 agent id", Required: true}, + &cli.StringFlag{Name: "chain", Usage: "Chain hosting the registries (base, base-sepolia, ethereum)", Value: "base"}, + &cli.StringFlag{Name: "skill", Usage: "Skill ref @ (alternative to the positional argument)"}, + &cli.StringSliceFlag{Name: "raters", Usage: "Optional whitelist of rater addresses (0x..., repeatable); empty = all raters"}, + }, + Action: func(ctx context.Context, cmd *cli.Command) error { + u := getUI(cmd) + ref, err := skillRefFromCmd(cmd) + if err != nil { + return err + } + net, err := erc8004.ResolveNetwork(cmd.String("chain")) + if err != nil { + return err + } + registry, err := erc8004.ReputationRegistryAddress(cmd.String("chain")) + if err != nil { + return err + } + raters, err := parseRaterAddresses(cmd.StringSlice("raters")) + if err != nil { + return err + } + agentID := big.NewInt(cmd.Int64("agent-id")) + tag2, err := erc8004.SkillTag2(net, agentID, ref) + if err != nil { + return err + } + + // Read-only eRPC-backed client; no signer anywhere near this path. + client, err := erc8004.NewClientForNetwork(ctx, stack.LocalIngressURL(cfg)+"/rpc", net) + if err != nil { + return fmt.Errorf("connect to %s via eRPC: %w", net.Name, err) + } + defer client.Close() + + reader, err := erc8004.NewReputationReader(client.ETH(), registry) + if err != nil { + return err + } + summary, err := reader.Summary(ctx, agentID, raters, erc8004.SkillTag1, tag2) + if err != nil { + return err + } + + score := skillScoreString(summary.SummaryValue, summary.SummaryValueDecimals) + if u.IsJSON() { + return u.JSON(struct { + AgentID int64 `json:"agentId"` + Skill string `json:"skill"` + Network string `json:"network"` + Registry string `json:"registry"` + Tag1 string `json:"tag1"` + Tag2 string `json:"tag2"` + Count uint64 `json:"count"` + Score string `json:"score"` + }{ + AgentID: cmd.Int64("agent-id"), + Skill: ref, + Network: net.Name, + Registry: registry, + Tag1: erc8004.SkillTag1, + Tag2: tag2, + Count: summary.Count, + Score: score, + }) + } + + u.Printf("Skill: %s (agent %s on %s)", ref, agentID, net.Name) + u.Printf("tag2: %s", tag2) + u.Printf("Ratings: %d", summary.Count) + u.Printf("Score: %s / 100", score) + if len(raters) > 0 { + u.Printf("Raters: %d whitelisted", len(raters)) + } + return nil + }, + } +} + +// skillsVerifyCommand checks a downloaded bundle against the seller's +// on-chain hash anchor. Exit code is non-zero on mismatch or when no +// anchor exists, so scripts can gate installs on it. +func skillsVerifyCommand(cfg *config.Config) *cli.Command { + return &cli.Command{ + Name: "verify", + Usage: "Verify a downloaded skill bundle against the seller's on-chain sha256 anchor", + ArgsUsage: "", + Flags: []cli.Flag{ + &cli.Int64Flag{Name: "agent-id", Usage: "[REQUIRED] The seller's ERC-8004 agent id", Required: true}, + &cli.StringFlag{Name: "skill", Usage: "[REQUIRED] Skill ref @", Required: true}, + &cli.StringFlag{Name: "chain", Usage: "Chain hosting the Identity Registry (base, base-sepolia, ethereum)", Value: "base"}, + }, + Action: func(ctx context.Context, cmd *cli.Command) error { + u := getUI(cmd) + if cmd.NArg() != 1 { + return fmt.Errorf("bundle path required: obol skills verify --agent-id N --skill @") + } + bundlePath := cmd.Args().First() + + _, _, err := erc8004.ParseSkillRef(cmd.String("skill")) + if err != nil { + return err + } + ref := strings.TrimSpace(cmd.String("skill")) + + localHash, err := sha256File(bundlePath) + if err != nil { + return err + } + + net, err := erc8004.ResolveNetwork(cmd.String("chain")) + if err != nil { + return err + } + client, err := erc8004.NewClientForNetwork(ctx, stack.LocalIngressURL(cfg)+"/rpc", net) + if err != nil { + return fmt.Errorf("connect to %s via eRPC: %w", net.Name, err) + } + defer client.Close() + + key := erc8004.SkillHashMetadataKey(ref) + agentID := big.NewInt(cmd.Int64("agent-id")) + onChain, err := client.GetMetadata(ctx, agentID, key) + if err != nil { + return fmt.Errorf("read on-chain metadata %q for agent %s on %s: %w", key, agentID, net.Name, err) + } + if len(onChain) == 0 { + return fmt.Errorf("FAIL: no on-chain hash anchored for %s (agent %s, key %q, %s) — ask the seller to run `obol skills calldata set-hash`", + ref, agentID, key, net.Name) + } + + if !skillHashMatches(onChain, localHash) { + u.Errorf("MISMATCH — do not trust this bundle") + u.Printf(" local sha256: %s", localHash) + u.Printf(" on-chain anchor: %s", strings.TrimSpace(string(onChain))) + return fmt.Errorf("bundle %s does not match the on-chain hash for %s (agent %s, %s)", bundlePath, ref, agentID, net.Name) + } + + u.Successf("OK — bundle matches the on-chain anchor") + u.Printf(" skill: %s (agent %s on %s)", ref, agentID, net.Name) + u.Printf(" sha256: %s", localHash) + return nil + }, + } +} + +// ── pure helpers (unit-tested without a live chain) ───────────────────────── + +// skillRefFromCmd resolves the @ ref from the positional +// argument or --skill and validates it. +func skillRefFromCmd(cmd *cli.Command) (string, error) { + ref := strings.TrimSpace(cmd.Args().First()) + if ref == "" { + ref = strings.TrimSpace(cmd.String("skill")) + } + if ref == "" { + return "", fmt.Errorf("skill ref required: pass @ as the argument or via --skill") + } + if _, _, err := erc8004.ParseSkillRef(ref); err != nil { + return "", err + } + return ref, nil +} + +var skillHashRe = regexp.MustCompile(`^[a-f0-9]{64}$`) + +// parseSkillHashArg normalizes an operator-supplied sha256: trims, drops +// an optional 0x prefix, lowercases, and validates 64 hex chars. +func parseSkillHashArg(s string) (string, error) { + h := strings.ToLower(strings.TrimSpace(s)) + h = strings.TrimPrefix(h, "0x") + if !skillHashRe.MatchString(h) { + return "", fmt.Errorf("invalid sha256 %q: want 64 hex chars (optionally 0x-prefixed)", s) + } + return h, nil +} + +// sha256File hashes a file's bytes to lowercase hex. +func sha256File(path string) (string, error) { + data, err := os.ReadFile(path) + if err != nil { + return "", fmt.Errorf("read %s: %w", path, err) + } + sum := sha256.Sum256(data) + return hex.EncodeToString(sum[:]), nil +} + +// parseRaterAddresses validates and converts --raters values. +func parseRaterAddresses(raw []string) ([]common.Address, error) { + var out []common.Address + for _, r := range raw { + r = strings.TrimSpace(r) + if r == "" { + continue + } + if !common.IsHexAddress(r) { + return nil, fmt.Errorf("invalid rater address %q", r) + } + out = append(out, common.HexToAddress(r)) + } + return out, nil +} + +// skillHashMatches compares the on-chain metadata value (ASCII hex, +// possibly 0x-prefixed or differently cased) against the local +// lowercase hex hash. +func skillHashMatches(onChain []byte, localHex string) bool { + chain := strings.ToLower(strings.TrimSpace(string(onChain))) + chain = strings.TrimPrefix(chain, "0x") + return chain == strings.ToLower(strings.TrimSpace(localHex)) +} + +// skillScoreString renders getSummary's fixed-point aggregate +// (summaryValue × 10^-decimals) as a decimal string. +func skillScoreString(value *big.Int, decimals uint8) string { + if value == nil { + return "0" + } + if decimals == 0 { + return value.String() + } + f := new(big.Float).SetInt(value) + scale := new(big.Float).SetInt(new(big.Int).Exp(big.NewInt(10), big.NewInt(int64(decimals)), nil)) + f.Quo(f, scale) + return f.Text('f', int(decimals)) +} diff --git a/cmd/obol/skills_test.go b/cmd/obol/skills_test.go new file mode 100644 index 00000000..9de52e83 --- /dev/null +++ b/cmd/obol/skills_test.go @@ -0,0 +1,319 @@ +package main + +import ( + "math/big" + "os" + "path/filepath" + "strings" + "testing" + + "github.com/urfave/cli/v3" +) + +func testSkillsCommand(t *testing.T) *cli.Command { + t.Helper() + return skillsCommand(newTestConfig(t)) +} + +// assertInt64FlagRequired covers *cli.Int64Flag, which the shared +// assertFlagRequired helper doesn't (ERC-8004 tokenIds exceed int32). +func assertInt64FlagRequired(t *testing.T, flags map[string]cli.Flag, name string) { + t.Helper() + f, ok := flags[name].(*cli.Int64Flag) + if !ok { + t.Fatalf("flag --%s is %T, want *cli.Int64Flag", name, flags[name]) + } + if !f.Required { + t.Errorf("flag --%s should be required", name) + } +} + +func TestSkillsCommand_Structure(t *testing.T) { + cmd := testSkillsCommand(t) + if cmd.Name != "skills" { + t.Fatalf("command name = %q, want skills", cmd.Name) + } + + calldata := findSubcommand(t, cmd, "calldata") + findSubcommand(t, calldata, "set-hash") + findSubcommand(t, calldata, "feedback") + findSubcommand(t, cmd, "reputation") + findSubcommand(t, cmd, "verify") +} + +func TestSkillsCalldataSetHash_Flags(t *testing.T) { + calldata := findSubcommand(t, testSkillsCommand(t), "calldata") + setHash := findSubcommand(t, calldata, "set-hash") + flags := flagMap(setHash) + + requireFlags(t, flags, "agent-id", "chain", "skill", "hash", "from-bundle") + assertStringDefault(t, flags, "chain", "base") + assertFlagHasAlias(t, flags, "from-bundle", "bundle") + assertInt64FlagRequired(t, flags, "agent-id") +} + +func TestSkillsCalldataFeedback_Flags(t *testing.T) { + calldata := findSubcommand(t, testSkillsCommand(t), "calldata") + feedback := findSubcommand(t, calldata, "feedback") + flags := flagMap(feedback) + + requireFlags(t, flags, "agent-id", "value", "chain", "skill", "endpoint", "feedback-uri", "feedback-hash") + assertStringDefault(t, flags, "chain", "base") + assertFlagHasAlias(t, flags, "feedback-uri", "uri") + assertFlagHasAlias(t, flags, "feedback-hash", "hash") + assertInt64FlagRequired(t, flags, "agent-id") + assertFlagRequired(t, flags, "value") +} + +func TestSkillsReputation_Flags(t *testing.T) { + reputation := findSubcommand(t, testSkillsCommand(t), "reputation") + flags := flagMap(reputation) + + requireFlags(t, flags, "agent-id", "chain", "skill", "raters") + assertStringDefault(t, flags, "chain", "base") + assertInt64FlagRequired(t, flags, "agent-id") + + if _, ok := flags["raters"].(*cli.StringSliceFlag); !ok { + t.Errorf("flag --raters is %T, want *cli.StringSliceFlag", flags["raters"]) + } +} + +func TestSkillsVerify_Flags(t *testing.T) { + verify := findSubcommand(t, testSkillsCommand(t), "verify") + flags := flagMap(verify) + + requireFlags(t, flags, "agent-id", "skill", "chain") + assertStringDefault(t, flags, "chain", "base") + assertInt64FlagRequired(t, flags, "agent-id") + assertFlagRequired(t, flags, "skill") +} + +// TestSkillsCalldataSetHash_PrintsCalldata runs the full command (no +// chain access — calldata building is pure) and checks the printer +// output carries the registry, the calldata, and the never-signs +// trailer. +func TestSkillsCalldataSetHash_PrintsCalldata(t *testing.T) { + out := captureStdout(t, func() error { + root := &cli.Command{Commands: []*cli.Command{skillsCommand(newTestConfig(t))}} + return root.Run(t.Context(), []string{ + "obol", "skills", "calldata", "set-hash", "quant-notes@0.1.0", + "--agent-id", "42", + "--chain", "base-sepolia", + "--hash", "0x" + strings.Repeat("ab", 32), + }) + }) + + for _, want := range []string{ + "skill.sha256:quant-notes@0.1.0", + "IdentityRegistry (base-sepolia): 0x8004A818BFB912233c491871b3d84c89A494BD9e", + "Calldata: 0x466648da", // setMetadata(uint256,string,bytes) selector + "NEVER signs", + } { + if !strings.Contains(out, want) { + t.Errorf("output missing %q\noutput:\n%s", want, out) + } + } +} + +func TestSkillsCalldataFeedback_PrintsTagsAndCalldata(t *testing.T) { + out := captureStdout(t, func() error { + root := &cli.Command{Commands: []*cli.Command{skillsCommand(newTestConfig(t))}} + return root.Run(t.Context(), []string{ + "obol", "skills", "calldata", "feedback", "quant-notes@0.1.0", + "--agent-id", "42", + "--value", "95", + "--chain", "base-sepolia", + }) + }) + + for _, want := range []string{ + "tag1: asr:skill", + "tag2: eip155:84532:0x8004a818bfb912233c491871b3d84c89a494bd9e:42:quant-notes@0.1.0", + "ReputationRegistry (base-sepolia): 0x8004B663056A597Dffe9eCcC1965A193B7388713", + "Calldata: 0x3c036a7e", // giveFeedback selector + "self-feedback", + } { + if !strings.Contains(out, want) { + t.Errorf("output missing %q\noutput:\n%s", want, out) + } + } +} + +func TestSkillsCalldataFeedback_RejectsOutOfRangeValue(t *testing.T) { + root := &cli.Command{Commands: []*cli.Command{skillsCommand(newTestConfig(t))}} + err := root.Run(t.Context(), []string{ + "obol", "skills", "calldata", "feedback", "x@1", "--agent-id", "1", "--value", "101", + }) + if err == nil || !strings.Contains(err.Error(), "0-100") { + t.Fatalf("err = %v, want 0-100 range error", err) + } +} + +func TestSkillsCalldataSetHash_HashSourceXOR(t *testing.T) { + run := func(args ...string) error { + root := &cli.Command{Commands: []*cli.Command{skillsCommand(newTestConfig(t))}} + full := append([]string{"obol", "skills", "calldata", "set-hash", "x@1", "--agent-id", "1"}, args...) + return root.Run(t.Context(), full) + } + + if err := run(); err == nil || !strings.Contains(err.Error(), "hash source required") { + t.Errorf("no source: err = %v, want hash-source error", err) + } + if err := run("--hash", strings.Repeat("ab", 32), "--from-bundle", "x.tar.gz"); err == nil || + !strings.Contains(err.Error(), "mutually exclusive") { + t.Errorf("both sources: err = %v, want mutual-exclusion error", err) + } +} + +// ── pure helper tests ─────────────────────────────────────────────────────── + +func TestParseSkillHashArg(t *testing.T) { + valid := strings.Repeat("ab", 32) + tests := []struct { + name string + in string + want string + wantErr bool + }{ + {name: "plain", in: valid, want: valid}, + {name: "0x prefix", in: "0x" + valid, want: valid}, + {name: "uppercase normalized", in: strings.ToUpper(valid), want: valid}, + {name: "whitespace trimmed", in: " " + valid + "\n", want: valid}, + {name: "too short", in: valid[:62], wantErr: true}, + {name: "non-hex", in: strings.Repeat("zz", 32), wantErr: true}, + {name: "empty", in: "", wantErr: true}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := parseSkillHashArg(tt.in) + if tt.wantErr { + if err == nil { + t.Fatalf("parseSkillHashArg(%q) = %q, want error", tt.in, got) + } + return + } + if err != nil { + t.Fatal(err) + } + if got != tt.want { + t.Errorf("parseSkillHashArg(%q) = %q, want %q", tt.in, got, tt.want) + } + }) + } +} + +func TestSkillHashMatches(t *testing.T) { + local := strings.Repeat("ab", 32) + tests := []struct { + name string + onChain string + want bool + }{ + {name: "exact", onChain: local, want: true}, + {name: "0x prefixed on chain", onChain: "0x" + local, want: true}, + {name: "uppercase on chain", onChain: strings.ToUpper(local), want: true}, + {name: "whitespace on chain", onChain: " " + local + "\n", want: true}, + {name: "mismatch", onChain: strings.Repeat("cd", 32), want: false}, + {name: "empty", onChain: "", want: false}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := skillHashMatches([]byte(tt.onChain), local); got != tt.want { + t.Errorf("skillHashMatches(%q) = %v, want %v", tt.onChain, got, tt.want) + } + }) + } +} + +func TestSkillScoreString(t *testing.T) { + tests := []struct { + name string + value *big.Int + decimals uint8 + want string + }{ + {name: "no scaling", value: big.NewInt(95), decimals: 0, want: "95"}, + {name: "two decimals", value: big.NewInt(9550), decimals: 2, want: "95.50"}, + {name: "zero", value: big.NewInt(0), decimals: 0, want: "0"}, + {name: "nil", value: nil, decimals: 2, want: "0"}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := skillScoreString(tt.value, tt.decimals); got != tt.want { + t.Errorf("skillScoreString = %q, want %q", got, tt.want) + } + }) + } +} + +func TestParseRaterAddresses(t *testing.T) { + addrs, err := parseRaterAddresses([]string{ + "0x1111111111111111111111111111111111111111", + " 0x2222222222222222222222222222222222222222 ", + "", + }) + if err != nil { + t.Fatal(err) + } + if len(addrs) != 2 { + t.Fatalf("len = %d, want 2 (empty entries skipped)", len(addrs)) + } + + if _, err := parseRaterAddresses([]string{"not-an-address"}); err == nil { + t.Error("invalid address should error") + } +} + +func TestSha256File(t *testing.T) { + p := filepath.Join(t.TempDir(), "bundle.tar.gz") + if err := os.WriteFile(p, []byte("abc"), 0o600); err != nil { + t.Fatal(err) + } + got, err := sha256File(p) + if err != nil { + t.Fatal(err) + } + // Well-known sha256("abc"). + if got != "ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad" { + t.Errorf("sha256File = %s", got) + } + + if _, err := sha256File(filepath.Join(t.TempDir(), "missing")); err == nil { + t.Error("missing file should error") + } +} + +// captureStdout redirects os.Stdout around fn — the calldata printers +// write with fmt.Printf, mirroring bountyFeedbackCommand. +func captureStdout(t *testing.T, fn func() error) string { + t.Helper() + old := os.Stdout + r, w, err := os.Pipe() + if err != nil { + t.Fatal(err) + } + os.Stdout = w + defer func() { os.Stdout = old }() + + runErr := fn() + + _ = w.Close() + buf := make([]byte, 0, 4096) + chunk := make([]byte, 4096) + for { + n, readErr := r.Read(chunk) + buf = append(buf, chunk[:n]...) + if readErr != nil { + break + } + } + os.Stdout = old + + if runErr != nil { + t.Fatalf("command failed: %v", runErr) + } + return string(buf) +} diff --git a/cmd/obol/smoke.go b/cmd/obol/smoke.go new file mode 100644 index 00000000..d4356a47 --- /dev/null +++ b/cmd/obol/smoke.go @@ -0,0 +1,140 @@ +package main + +import ( + "context" + "fmt" + "regexp" + "strings" + + "github.com/ObolNetwork/obol-stack/internal/config" + "github.com/ObolNetwork/obol-stack/internal/erc8004" + "github.com/ethereum/go-ethereum/common" + "github.com/urfave/cli/v3" +) + +// smokeTestTag is the default validationResponse tag for smoke-test verdicts; +// it matches the erc8004 smoke-test request-hash domain. +const smokeTestTag = "obol/smoke-test/v1" + +// smokeBytes32Re matches a 0x-prefixed bytes32 hex string (the sha256 of the +// committed report.md, or an explicit request-hash override). +var smokeBytes32Re = regexp.MustCompile(`^0x[0-9a-fA-F]{64}$`) + +// smokeCommand groups the smoke-test agent's operator verbs. v0 carries only +// `calldata`: derive ERC-8004 validationResponse calldata for a finished +// smoke run so the operator can submit it with THEIR OWN wallet — the agent +// and the controller NEVER sign validation transactions (same stance as +// `obol bounty eval calldata`). +func smokeCommand(cfg *config.Config) *cli.Command { + return &cli.Command{ + Name: "smoke", + Usage: "Smoke-test agent verbs: derive ERC-8004 verdict calldata for a run", + Commands: []*cli.Command{ + smokeCalldataCommand(cfg), + }, + } +} + +// smokeCalldataCommand prints ERC-8004 validationResponse calldata for one +// smoke-test run. The request hash is derived as +// keccak256("obol/smoke-test/v1||") unless an explicit +// --request-hash override is given (mirrors `obol bounty eval calldata`). +func smokeCalldataCommand(cfg *config.Config) *cli.Command { + return &cli.Command{ + Name: "calldata", + Usage: "Print ERC-8004 validationResponse calldata for a smoke run, for YOUR wallet to submit (the agent NEVER signs)", + Flags: []cli.Flag{ + &cli.StringFlag{Name: "target", Usage: "[REQUIRED] Smoke target base URL (normalized: trimmed, trailing slashes dropped)", Required: true}, + &cli.StringFlag{Name: "run-id", Usage: "[REQUIRED] Run ID from the smoke report (results.json runId)", Required: true}, + &cli.StringFlag{Name: "request-hash", Usage: "Explicit validation request hash (bytes32, 0x...) — overrides --target/--run-id derivation"}, + &cli.IntFlag{Name: "response", Usage: "[REQUIRED] Verdict score 0-100 (results.json score100; the registry reverts above 100)", Required: true}, + &cli.StringFlag{Name: "response-uri", Usage: "Commit-pinned GitHub permalink of the committed report.md"}, + &cli.StringFlag{Name: "response-hash", Usage: "sha256 of the committed report.md bytes (0x + 64 hex; results.json reportSha256). Optional, zero allowed"}, + &cli.StringFlag{Name: "tag", Usage: "Validation tag", Value: smokeTestTag}, + &cli.StringFlag{Name: "network", Usage: "Chain", Value: "base-sepolia"}, + }, + Action: func(ctx context.Context, cmd *cli.Command) error { + res, err := buildSmokeCalldata(smokeCalldataInput{ + Target: cmd.String("target"), + RunID: cmd.String("run-id"), + RequestHashOverride: cmd.String("request-hash"), + Response: int(cmd.Int("response")), + ResponseURI: cmd.String("response-uri"), + ResponseHash: cmd.String("response-hash"), + Tag: cmd.String("tag"), + Network: cmd.String("network"), + }) + if err != nil { + return err + } + fmt.Printf("Request hash: %s\n", res.RequestHash.Hex()) + fmt.Printf("ValidationRegistry (%s): %s\n", cmd.String("network"), res.Registry) + fmt.Printf("Calldata: 0x%x\n", res.Calldata) + fmt.Println("Submit with YOUR wallet (e.g. the agent remote-signer or cast send) — the smoke agent and the controller NEVER sign validation transactions.") + return nil + }, + } +} + +// smokeCalldataInput carries the raw flag values for one calldata derivation. +type smokeCalldataInput struct { + Target string + RunID string + RequestHashOverride string + Response int + ResponseURI string + ResponseHash string + Tag string + Network string +} + +// smokeCalldataResult is the derived submit-ready transaction material. +type smokeCalldataResult struct { + RequestHash common.Hash + Registry string + Calldata []byte +} + +// buildSmokeCalldata validates the inputs and packs validationResponse +// calldata via the shared erc8004 encoder. Kept free of CLI plumbing so the +// golden test can pin the exact bytes. +func buildSmokeCalldata(in smokeCalldataInput) (smokeCalldataResult, error) { + if in.Response < 0 || in.Response > erc8004.MaxValidationResponse { + return smokeCalldataResult{}, fmt.Errorf("--response %d out of range 0-%d (the deployed registry reverts above %d; submit results.json score100, not score255)", + in.Response, erc8004.MaxValidationResponse, erc8004.MaxValidationResponse) + } + + requestHash := erc8004.SmokeTestRequestHash(in.Target, in.RunID) + if raw := strings.TrimSpace(in.RequestHashOverride); raw != "" { + if !smokeBytes32Re.MatchString(raw) { + return smokeCalldataResult{}, fmt.Errorf("--request-hash %q is not a bytes32 hex string (0x + 64 hex chars)", raw) + } + requestHash = common.HexToHash(raw) + } + + responseHash := common.Hash{} + if raw := strings.TrimSpace(in.ResponseHash); raw != "" { + if !smokeBytes32Re.MatchString(raw) { + return smokeCalldataResult{}, fmt.Errorf("--response-hash %q is not a sha256 hex string (0x + 64 hex chars)", raw) + } + responseHash = common.HexToHash(raw) + } + + registry, err := erc8004.ValidationRegistryAddress(in.Network) + if err != nil { + return smokeCalldataResult{}, err + } + + calldata, err := erc8004.EncodeValidationResponse( + requestHash, + uint8(in.Response), + in.ResponseURI, + responseHash, + in.Tag, + ) + if err != nil { + return smokeCalldataResult{}, err + } + + return smokeCalldataResult{RequestHash: requestHash, Registry: registry, Calldata: calldata}, nil +} diff --git a/cmd/obol/smoke_test.go b/cmd/obol/smoke_test.go new file mode 100644 index 00000000..1efe4e9b --- /dev/null +++ b/cmd/obol/smoke_test.go @@ -0,0 +1,225 @@ +package main + +import ( + "encoding/hex" + "strings" + "testing" + + "github.com/ObolNetwork/obol-stack/internal/config" + "github.com/ObolNetwork/obol-stack/internal/erc8004" + "github.com/urfave/cli/v3" +) + +// ───────────────────────────────────────────────────────────────────────────── +// Command structure (house style: sell_test.go) +// ───────────────────────────────────────────────────────────────────────────── + +func testSmokeCommand(t *testing.T) *cli.Command { + t.Helper() + return smokeCommand(&config.Config{}) +} + +func TestSmokeCalldataCommand_Flags(t *testing.T) { + calldata := findSubcommand(t, testSmokeCommand(t), "calldata") + flags := flagMap(calldata) + + requireFlags(t, flags, "target", "run-id", "request-hash", "response", "response-uri", "response-hash", "tag", "network") + assertFlagRequired(t, flags, "target") + assertFlagRequired(t, flags, "run-id") + assertFlagRequired(t, flags, "response") + assertStringDefault(t, flags, "network", "base-sepolia") + assertStringDefault(t, flags, "tag", "obol/smoke-test/v1") + + // --request-hash is an optional OVERRIDE (mirrors bounty eval calldata): + // the default derivation comes from --target/--run-id. + if f, ok := flags["request-hash"].(*cli.StringFlag); !ok || f.Required { + t.Errorf("--request-hash must be an optional override (derive via --target/--run-id), got required=%v", ok && f.Required) + } + if f, ok := flags["response-hash"].(*cli.StringFlag); !ok || f.Required { + t.Errorf("--response-hash must be optional (zero responseHash is allowed), got required=%v", ok && f.Required) + } +} + +// ───────────────────────────────────────────────────────────────────────────── +// Golden calldata +// ───────────────────────────────────────────────────────────────────────────── + +// TestBuildSmokeCalldata_Golden pins the full validationResponse calldata for +// fixed inputs: the 4-byte selector (validationResponse(bytes32,uint8,string, +// bytes32,string) == 0x3d659a96), the derived request hash (the erc8004 +// smoke golden vector), and the exact ABI-encoded bytes. Any drift here +// changes what operators submit on-chain, so the hex is hardcoded. +func TestBuildSmokeCalldata_Golden(t *testing.T) { + const ( + target = "http://obol.stack:8080" + runID = "20260101T000000Z-ab12cd" + responseURI = "https://github.com/example/obol-smoke-reports/blob/0011223344556677889900112233445566778899/reports/obol.stack-8080/20260101T000000Z-ab12cd.md" + responseHash = "0x9f86d081884c7d659a2feaa0c55ad015a3bf4f1b2b0b822cd15d6c15b0f00a08" + + goldenRequestHash = "0x2a28aa12a52a28414de4933bbe8d1e52e42828ba08006748f544596823ce7a57" + goldenSelector = "3d659a96" + goldenCalldata = "3d659a96" + + "2a28aa12a52a28414de4933bbe8d1e52e42828ba08006748f544596823ce7a57" + + "0000000000000000000000000000000000000000000000000000000000000054" + + "00000000000000000000000000000000000000000000000000000000000000a0" + + "9f86d081884c7d659a2feaa0c55ad015a3bf4f1b2b0b822cd15d6c15b0f00a08" + + "0000000000000000000000000000000000000000000000000000000000000160" + + "000000000000000000000000000000000000000000000000000000000000008e" + + "68747470733a2f2f6769746875622e636f6d2f6578616d706c652f6f626f6c2d" + + "736d6f6b652d7265706f7274732f626c6f622f303031313232333334343535363" + + "637373838393930303131323233333434353536363737383839392f7265706f72" + + "74732f6f626f6c2e737461636b2d383038302f3230323630313031543030303030" + + "305a2d6162313263642e6d64000000000000000000000000000000000000" + + "0000000000000000000000000000000000000000000000000000000000000012" + + "6f626f6c2f736d6f6b652d746573742f76310000000000000000000000000000" + ) + + res, err := buildSmokeCalldata(smokeCalldataInput{ + Target: target, + RunID: runID, + Response: 84, + ResponseURI: responseURI, + ResponseHash: responseHash, + Tag: "obol/smoke-test/v1", + Network: "base-sepolia", + }) + if err != nil { + t.Fatalf("buildSmokeCalldata: %v", err) + } + + if res.RequestHash.Hex() != goldenRequestHash { + t.Errorf("request hash = %s, want %s", res.RequestHash.Hex(), goldenRequestHash) + } + if res.Registry != erc8004.ValidationRegistryV2BaseSepolia { + t.Errorf("registry = %s, want %s", res.Registry, erc8004.ValidationRegistryV2BaseSepolia) + } + + got := hex.EncodeToString(res.Calldata) + if !strings.HasPrefix(got, goldenSelector) { + t.Errorf("selector = 0x%s, want 0x%s (validationResponse)", got[:8], goldenSelector) + } + if got != goldenCalldata { + t.Errorf("calldata drifted:\n got 0x%s\nwant 0x%s", got, goldenCalldata) + } + + // Round-trip through the shared decoder: every field the operator submits + // must come back exactly. + decoded, err := erc8004.DecodeValidationResponseCalldata(res.Calldata) + if err != nil { + t.Fatalf("DecodeValidationResponseCalldata: %v", err) + } + if decoded.RequestHash.Hex() != goldenRequestHash { + t.Errorf("decoded request hash = %s, want %s", decoded.RequestHash.Hex(), goldenRequestHash) + } + if decoded.Response != 84 { + t.Errorf("decoded response = %d, want 84", decoded.Response) + } + if decoded.ResponseURI != responseURI { + t.Errorf("decoded responseURI = %q, want %q", decoded.ResponseURI, responseURI) + } + if decoded.ResponseHash.Hex() != responseHash { + t.Errorf("decoded responseHash = %s, want %s", decoded.ResponseHash.Hex(), responseHash) + } + if decoded.Tag != "obol/smoke-test/v1" { + t.Errorf("decoded tag = %q, want obol/smoke-test/v1", decoded.Tag) + } +} + +// TestBuildSmokeCalldata_RequestHashOverride proves --request-hash wins over +// the --target/--run-id derivation, mirroring bounty eval calldata. +func TestBuildSmokeCalldata_RequestHashOverride(t *testing.T) { + const override = "0x1111111111111111111111111111111111111111111111111111111111111111" + + res, err := buildSmokeCalldata(smokeCalldataInput{ + Target: "http://obol.stack:8080", + RunID: "20260101T000000Z-ab12cd", + RequestHashOverride: override, + Response: 100, + Network: "base-sepolia", + }) + if err != nil { + t.Fatalf("buildSmokeCalldata: %v", err) + } + if res.RequestHash.Hex() != override { + t.Errorf("request hash = %s, want override %s", res.RequestHash.Hex(), override) + } + + if _, err := buildSmokeCalldata(smokeCalldataInput{ + Target: "http://obol.stack:8080", + RunID: "20260101T000000Z-ab12cd", + RequestHashOverride: "0x1234", + Response: 100, + Network: "base-sepolia", + }); err == nil { + t.Error("expected error for malformed --request-hash override") + } +} + +// ───────────────────────────────────────────────────────────────────────────── +// Flag validation +// ───────────────────────────────────────────────────────────────────────────── + +func TestBuildSmokeCalldata_RejectsResponseOutOfRange(t *testing.T) { + base := smokeCalldataInput{ + Target: "http://obol.stack:8080", + RunID: "20260101T000000Z-ab12cd", + Network: "base-sepolia", + } + + for _, response := range []int{-1, 101, 255} { + in := base + in.Response = response + if _, err := buildSmokeCalldata(in); err == nil { + t.Errorf("response %d: expected out-of-range error (registry reverts above %d)", response, erc8004.MaxValidationResponse) + } + } + + // Boundary values must pass. + for _, response := range []int{0, 100} { + in := base + in.Response = response + if _, err := buildSmokeCalldata(in); err != nil { + t.Errorf("response %d: unexpected error: %v", response, err) + } + } +} + +func TestBuildSmokeCalldata_RejectsMalformedResponseHash(t *testing.T) { + base := smokeCalldataInput{ + Target: "http://obol.stack:8080", + RunID: "20260101T000000Z-ab12cd", + Response: 50, + Network: "base-sepolia", + } + + for _, malformed := range []string{ + "0x1234", // too short + "9f86d081884c7d659a2feaa0c55ad015a3bf4f1b2b0b822cd15d6c15b0f00a08", // missing 0x + "0x9f86d081884c7d659a2feaa0c55ad015a3bf4f1b2b0b822cd15d6c15b0f00aZZ", // non-hex + "0x9f86d081884c7d659a2feaa0c55ad015a3bf4f1b2b0b822cd15d6c15b0f00a0800", // too long + } { + in := base + in.ResponseHash = malformed + if _, err := buildSmokeCalldata(in); err == nil { + t.Errorf("response hash %q: expected malformed-hash error", malformed) + } + } + + // Empty response hash is explicitly allowed (zero responseHash per spec). + in := base + in.ResponseHash = "" + if _, err := buildSmokeCalldata(in); err != nil { + t.Errorf("empty response hash should be allowed (zero hash): %v", err) + } +} + +func TestBuildSmokeCalldata_RejectsUnknownNetwork(t *testing.T) { + if _, err := buildSmokeCalldata(smokeCalldataInput{ + Target: "http://obol.stack:8080", + RunID: "20260101T000000Z-ab12cd", + Response: 50, + Network: "not-a-chain", + }); err == nil { + t.Error("expected error for a network without a verified validation registry deployment") + } +} diff --git a/cmd/x402-escrow/main.go b/cmd/x402-escrow/main.go new file mode 100644 index 00000000..1dc05d1e --- /dev/null +++ b/cmd/x402-escrow/main.go @@ -0,0 +1,144 @@ +// Command x402-escrow is the escrow facilitator for ServiceBounty rewards: +// it verifies and holds Permit2 batch-transfer vouchers (reserve), settles +// them on-chain via permitTransferFrom (capture), and drops them store-only +// (void — the voucher deadline is the hard on-chain guarantee). +// +// Configuration is environment-driven: +// +// OBOL_ESCROW_TOKEN bearer token for /escrow/* (empty = no auth, dev only) +// OBOL_ESCROW_STATE_DIR file-backed JSON state dir (default /data) +// OBOL_ESCROW_KEY hex private key for local settlement signing +// OBOL_ESCROW_SIGNER_URL remote-signer base URL (used when no key is set) +// OBOL_ESCROW_RPC_BASE per-network JSON-RPC base (default in-cluster eRPC) +// OBOL_ESCROW_NETWORKS csv chain aliases served (default base,base-sepolia) +// +// With neither key nor signer URL, capture returns 503 while reserve/void +// keep working (vouchers cannot be verified either — the spender binding has +// nothing to bind to). +package main + +import ( + "context" + "flag" + "log" + "net/http" + "os" + "os/signal" + "strings" + "syscall" + "time" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/crypto" + + "github.com/ObolNetwork/obol-stack/internal/erc8004" + "github.com/ObolNetwork/obol-stack/internal/x402/escrow" +) + +type config struct { + Token string + StateDir string + KeyHex string + SignerURL string + RPCBase string + Networks []string +} + +// loadConfig resolves the environment with defaults; get is injectable for +// tests (pass os.Getenv in main). +func loadConfig(get func(string) string) config { + cfg := config{ + Token: get("OBOL_ESCROW_TOKEN"), + StateDir: get("OBOL_ESCROW_STATE_DIR"), + KeyHex: get("OBOL_ESCROW_KEY"), + SignerURL: get("OBOL_ESCROW_SIGNER_URL"), + RPCBase: get("OBOL_ESCROW_RPC_BASE"), + } + if cfg.StateDir == "" { + cfg.StateDir = "/data" + } + if cfg.RPCBase == "" { + cfg.RPCBase = erc8004.DefaultRPCBase + } + csv := get("OBOL_ESCROW_NETWORKS") + if csv == "" { + csv = "base,base-sepolia" + } + for _, part := range strings.Split(csv, ",") { + if n := strings.TrimSpace(part); n != "" { + cfg.Networks = append(cfg.Networks, n) + } + } + return cfg +} + +func main() { + listen := flag.String("listen", ":8403", "Listen address") + flag.Parse() + + cfg := loadConfig(os.Getenv) + + store, err := escrow.NewStore(cfg.StateDir) + if err != nil { + log.Fatalf("open state dir: %v", err) + } + + var spender common.Address + var submitter escrow.Submitter + switch { + case cfg.KeyHex != "": + key, err := crypto.HexToECDSA(strings.TrimPrefix(strings.TrimPrefix(cfg.KeyHex, "0x"), "0X")) + if err != nil { + log.Fatalf("parse OBOL_ESCROW_KEY: %v", err) + } + spender = crypto.PubkeyToAddress(key.PublicKey) + submitter = &escrow.EthSubmitter{RPCBase: cfg.RPCBase, Key: key} + log.Printf("settling locally as %s", spender.Hex()) + case cfg.SignerURL != "": + signer := erc8004.NewRemoteSigner(cfg.SignerURL) + ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + addr, err := signer.GetAddress(ctx) + cancel() + if err != nil { + log.Fatalf("resolve remote signer address at %s: %v", cfg.SignerURL, err) + } + spender = addr + submitter = &escrow.EthSubmitter{RPCBase: cfg.RPCBase, Signer: signer, SignerAddress: addr} + log.Printf("settling via remote signer %s as %s", cfg.SignerURL, spender.Hex()) + default: + log.Printf("no OBOL_ESCROW_KEY or OBOL_ESCROW_SIGNER_URL: capture disabled, reserve/void still served") + } + if cfg.Token == "" { + log.Printf("warning: OBOL_ESCROW_TOKEN is empty — escrow routes are unauthenticated (mirrors HTTPGateway omitting the Authorization header)") + } + + srv := escrow.NewServer(store, escrow.ServerOptions{ + Token: cfg.Token, + Spender: spender, + Networks: cfg.Networks, + Submitter: submitter, + }) + + server := &http.Server{ + Addr: *listen, + Handler: srv.Handler(), + ReadHeaderTimeout: 10 * time.Second, + } + + go func() { + log.Printf("x402-escrow listening on %s (state: %s, networks: %s)", *listen, cfg.StateDir, strings.Join(cfg.Networks, ", ")) + if err := server.ListenAndServe(); err != nil && err != http.ErrServerClosed { + log.Fatalf("listen: %v", err) + } + }() + + stop := make(chan os.Signal, 1) + signal.Notify(stop, syscall.SIGINT, syscall.SIGTERM) + <-stop + + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + if err := server.Shutdown(ctx); err != nil { + log.Printf("shutdown: %v", err) + } +} diff --git a/cmd/x402-escrow/main_test.go b/cmd/x402-escrow/main_test.go new file mode 100644 index 00000000..52098343 --- /dev/null +++ b/cmd/x402-escrow/main_test.go @@ -0,0 +1,47 @@ +package main + +import ( + "reflect" + "testing" + + "github.com/ObolNetwork/obol-stack/internal/erc8004" +) + +func TestLoadConfig_Defaults(t *testing.T) { + cfg := loadConfig(func(string) string { return "" }) + + if cfg.StateDir != "/data" { + t.Errorf("StateDir = %q, want /data", cfg.StateDir) + } + if cfg.RPCBase != erc8004.DefaultRPCBase { + t.Errorf("RPCBase = %q, want %q", cfg.RPCBase, erc8004.DefaultRPCBase) + } + if !reflect.DeepEqual(cfg.Networks, []string{"base", "base-sepolia"}) { + t.Errorf("Networks = %v, want [base base-sepolia]", cfg.Networks) + } + if cfg.Token != "" || cfg.KeyHex != "" || cfg.SignerURL != "" { + t.Errorf("credentials should default empty: %+v", cfg) + } +} + +func TestLoadConfig_Overrides(t *testing.T) { + env := map[string]string{ + "OBOL_ESCROW_TOKEN": "tok", + "OBOL_ESCROW_STATE_DIR": "/var/lib/escrow", + "OBOL_ESCROW_KEY": "0xabc123", + "OBOL_ESCROW_SIGNER_URL": "http://remote-signer:9000", + "OBOL_ESCROW_RPC_BASE": "http://127.0.0.1:8545", + "OBOL_ESCROW_NETWORKS": " base-sepolia , , polygon ", + } + cfg := loadConfig(func(k string) string { return env[k] }) + + if cfg.Token != "tok" || cfg.StateDir != "/var/lib/escrow" || cfg.KeyHex != "0xabc123" { + t.Errorf("cfg = %+v", cfg) + } + if cfg.SignerURL != "http://remote-signer:9000" || cfg.RPCBase != "http://127.0.0.1:8545" { + t.Errorf("cfg = %+v", cfg) + } + if !reflect.DeepEqual(cfg.Networks, []string{"base-sepolia", "polygon"}) { + t.Errorf("Networks = %v, want trimmed csv with empties dropped", cfg.Networks) + } +} diff --git a/docs/guides/monetize-dataset.md b/docs/guides/monetize-dataset.md new file mode 100644 index 00000000..e7d33266 --- /dev/null +++ b/docs/guides/monetize-dataset.md @@ -0,0 +1,121 @@ +# Sell a dataset (and fine-tune on it) + +This guide takes a local dataset from raw bytes to a **versioned, content-addressed, +membership-gated product** that other obol-stacks discover, pay for, download +(verifying every byte), and fine-tune on — with provenance from the model back +to the exact dataset version. + +The dataset is **one artifact, two uses**: the same `sft.jsonl` is your local +fine-tune input *and* the bytes you sell. Nothing is re-exported. + +## 0. Prerequisites + +A dataset *bundle directory* containing a `manifest.json` (a content-address +`hash` + a `files` list) and a `*.jsonl` training artifact: + +``` +my-bundle/ + manifest.json {"hash":"", "files":["sft.jsonl"]} + sft.jsonl chat/instruction records, one JSON object per line +``` + +## 1. Anonymize (before anything leaves the host) + +```bash +SKILLS=${OBOL_SKILLS_DIR:-~/.config/obol/skills} +python3 "$SKILLS/dataset-anonymize/scripts/anonymize.py" \ + my-bundle/sft.jsonl my-bundle/sft.jsonl --report +``` + +The default regex redactor masks emails, IPs, keys, card/SSN-shaped numbers, +home paths, and phones into typed placeholders. For ML-grade detection set +`OBOL_ANONYMIZER_MODEL` to a Hugging Face token-classification PII model. See +the `dataset-anonymize` skill. + +## 2. Record a signed version + +```bash +obol dataset from my-bundle --name pi-sessions +``` + +This reads the bundle, computes the artifact's whole-file SHA-256, and appends +a **signed** `DatasetVersion` (v1) to the dataset's version log — chained to +its predecessor, signed by your owner key (the address buyers pin). Append a new +snapshot later with `obol dataset version pi-sessions --bundle my-bundle-v2`. + +Walk the chain offline at any time: + +```bash +obol dataset verify pi-sessions # rejects any reorder/tamper/middle-removal +``` + +## 3. Publish (host + tunnel + gate) + +```bash +obol dataset publish pi-sessions --membership invite +``` + +Starts the artifact server on your machine and a Cloudflare tunnel. **Bytes +never leave un-gated**: every `/dataset//download` requires a member token +*and* checks the token is entitled to the requested version. The server streams +with HTTP Range (resumable) and commits the whole-file hash on `200` and `206` +alike. + +Two ways a caller holds a member token: + +- **Pre-approved worker** — joins via device-auth; you run + `obol dataset approve `. Gets full (head) access. +- **Anonymous market buyer** — pays the priced offer; the edge x402 verifier + proves the settled payment, and the server mints a token scoped to exactly + the version paid for (`/join/paid`). Payment *is* the approval; the dataset + stays invisible to non-payers. + +Member tokens are persisted by hash, so paying members survive a host restart +without re-paying. + +## 4. Discovery (federated, no central hub) + +A priced dataset is a `type=dataset` `ServiceOffer`: it rides the existing +controller → route → payment-gate → catalog pipeline unchanged, and appears in +the seller's `/api/services.json` with its pinned version metadata +(`datasetManifestHash`, `datasetVersion`, `datasetSizeBytes`). The obol-router +federates that catalog across stacks **type-agnostically** — a dataset is just +another catalog entry — and the on-chain registration is indexed for +discovery. No central hub: each operator owns their dataset; discovery is the +union of everyone's catalogs. + +## 5. Buy — download + verify + +```bash +obol buy dataset https:// --id pi-sessions --version 1 \ + --member-token --out pi-sessions-v1.jsonl +``` + +The client streams the artifact (resuming from a `.part` if interrupted) and +recomputes the whole-file SHA-256, asserting it equals the server's +`X-Dataset-File-Hash`. A mismatch or a missing commitment **fails closed** — no +unverifiable file is ever finalized. + +## 6. Fine-tune (one contract, many backends) + +```bash +python3 "$SKILLS/finetune-backend/scripts/runner.py" \ + --backend unsloth \ + --dataset pi-sessions-v1.jsonl --base-model unsloth/Qwen2.5-0.5B \ + --manifest-hash --out ./run +``` + +Every backend (`mlx-lora`, `unsloth`, `axolotl`, `torchtune`, or `mock` for a +no-GPU contract check) reads the same JSONL. The runner writes +`run.manifest` binding `dataset_hash` to the exact version you bought — the +provenance link from a fine-tuned model back to its data. That is also the +deliverable shape the `finetune@v1` bounty task declares, so a standalone run +and a verified/bounty run stay consistent. + +## Invariants + +- Only the membership-gated route class is ever tunnel-exposed; dataset bytes + never leave the host without a valid, version-scoped member token. +- The version log is signed by the owner key and chained; verification is + offline and detects reorder/tamper. +- The controller never signs or holds a key; settlement is on-chain canonical. diff --git a/docs/guides/skill-marketplace.md b/docs/guides/skill-marketplace.md new file mode 100644 index 00000000..cf5e0673 --- /dev/null +++ b/docs/guides/skill-marketplace.md @@ -0,0 +1,330 @@ +# Skill Marketplace (v0) + +This guide walks you through selling a skill — a `SKILL.md` + scripts bundle, +the same shape as the skills shipped inside the `obol` binary — as a single +sellable and ratable unit, and through the buyer-side verification and +on-chain rating loop. + +A skill can be sold in two modes: + +- **SHARE** — the skill bundle itself is the product. A `type=skill` + ServiceOffer serves a hash-pinned `bundle.tar.gz` behind an x402 payment + gate; buyers pay per download. +- **SERVICE** — the skill stays private and buyers pay to *invoke* it: thin + sugar over the existing `type=agent` sell path, wrapping an agent that has + the skill installed. + +> [!IMPORTANT] +> The skill marketplace is alpha software (v0). If you encounter an issue, +> please open a [GitHub issue](https://github.com/ObolNetwork/obol-stack/issues). + +> [!NOTE] +> Rating and integrity ride ERC-8004 using a tag convention derived from the +> **ERC-8239 draft** ([ethereum/ERCs PR #1704](https://github.com/ethereum/ERCs/pull/1704)). +> ERC-8239 is an unmerged draft that we track; the obol interim form +> documented below may change if the final ERC diverges. See +> [The tag convention](#the-tag-convention-erc-8239-provenance). + +## System Overview + +``` +SELLER (obol stack cluster) + + obol sell skill --> bundle ConfigMap (binaryData bundle.tar.gz, <=900000 B) + --> ServiceOffer CR (type=skill, spec.skill.sha256 pins bytes) + | + v + serviceoffer-controller: + - verifies sha256(ConfigMap bytes) == spec.skill.sha256 + - renders bundle server so--bundle (busybox httpd :8080) + - publishes /services//* behind x402 ForwardAuth + +BUYER + + 1. GET /services//bundle.tar.gz -> 402 + extra.skill.sha256 + 2. Pay (x402) -> download bundle.tar.gz -> sha256sum == extra.skill.sha256 + 3. obol skills verify -> compare against on-chain pin + 4. obol skills calldata feedback -> rate it (operator submits tx) +``` + +## Prerequisites + +- A running Obol Stack (`obol stack init && obol stack up`) +- A wallet address to receive payments (`--pay-to`) +- For the on-chain steps: an ERC-8004 identity (`obol sell register`) and a + wallet with gas on the target chain — calldata printed by the CLI is + **submitted by you, the operator**; no obol component ever signs or sends + these transactions + +--- + +## Part 1: Sell a skill bundle (SHARE mode) + +Sell one of the skills embedded in the `obol` binary, or any local skill +directory (must contain a top-level `SKILL.md`): + +```bash +# From an embedded skill +obol sell skill my-skill \ + --from-embedded gas \ + --skill-version 0.1.0 \ + --per-request 0.25 \ + --chain base-sepolia \ + --pay-to 0xYourWalletAddress + +# From a local directory +obol sell skill my-skill \ + --from ./path/to/skill-dir \ + --skill-version 0.1.0 \ + --display-name "My Skill" \ + --description "What the skill does" \ + --per-request 0.25 \ + --chain base-sepolia \ + --pay-to 0xYourWalletAddress +``` + +Notes: + +- `--from` and `--from-embedded` are mutually exclusive; both paths share one + deterministic packer. +- Skills are priced **per request** (one flat price per download) — there is + no `--per-mtok`/`--per-hour` for skills in v0. +- Card payments (`--pay-with`) are not offered on `sell skill` in v0. +- Registration is on by default; use `--no-register` for local/private flows. + +What the CLI does: + +1. Packs the directory into a **deterministic** gzipped tar: entries sorted, + USTAR format, normalized modes (0644/0755), zeroed timestamps/owners, + max-compression gzip with a zeroed header. Same source tree, same bytes, + same hash — every time. +2. Enforces the compressed-size cap (**900000 bytes** — the artifact rides a + ConfigMap) and computes the lowercase hex sha256 of the gzipped bytes. +3. Writes the bundle ConfigMap (server-side apply — client-side apply would + blow the 256KiB annotation cap for larger bundles) and the `type=skill` + ServiceOffer pinning that sha256 in `spec.skill.sha256`. + +Wait for the controller to converge: + +```bash +obol sell status my-skill -n default +``` + +The controller refuses to publish unless the ConfigMap bytes hash to +`spec.skill.sha256`. Skill-specific `UpstreamHealthy=False` reasons: +`BundleMissing`, `BundleTooLarge`, `BundleHashMismatch`, and +`InvalidSkillUpstream` (a skill offer may only advertise its own +controller-rendered bundle server `so--bundle`). Once `Ready=True`, +the offer also appears on the public catalog surfaces (`/skill.md`, +`/api/services.json`) with `type=skill`. + +The offer is replayed by `obol sell resume` / `obol stack up` after a host +reboot (the bundle ConfigMap is persisted alongside the offer manifest). + +## Part 2: Verify the 402 (buyer, before paying) + +An unpaid request returns the x402 payment requirements **plus the bundle's +identity and integrity hash** — for free: + +```bash +curl -s http://obol.stack:8080/services/my-skill/bundle.tar.gz | python3 -m json.tool +``` + +Look at `accepts[0].extra.skill`: + +```json +{ + "name": "gas", + "version": "0.1.0", + "sha256": "3f8e…64-hex…b21c" +} +``` + +This is the sha256 of the exact gzipped bytes the seller's controller +verified and serves. Record it before paying — it is what you will check the +download against. + +## Part 3: Buy the bundle + +Any x402-capable client works: probe, sign one payment authorization for the +advertised price, retry with the `X-PAYMENT` header, save the response body. +Two gated paths exist on the route, each costing one `perRequest` payment: + +- `/services//bundle.tar.gz` — the artifact (binary) +- `/services//skill.json` — metadata JSON (name, version, sha256, …) + +From an obol-stack agent, the `buy-x402` skill's `buy.py pay` performs the +one-shot paid request (probe → pre-sign one auth → send; max loss = one +request price): + +```bash +python3 ${OBOL_SKILLS_DIR:-/data/.openclaw/skills}/buy-x402/scripts/buy.py pay \ + "http://traefik.traefik.svc.cluster.local/services/my-skill/skill.json" +``` + +> [!NOTE] +> `buy.py pay` prints the response body as text on stdout, so use it for +> `skill.json` (or to exercise the paid loop). For the binary +> `bundle.tar.gz`, use an x402 client that writes the raw response body to +> disk. From a pod, use the in-cluster Traefik address shown above; +> `obol.stack:8080` only resolves on the host. + +## Part 4: Verify the downloaded bundle + +```bash +# Hash must equal the 402-advertised extra.skill.sha256 +sha256sum bundle.tar.gz + +# Well-formedness: gzipped tar with a top-level SKILL.md +tar -tzf bundle.tar.gz | head +``` + +If the seller has pinned the hash on-chain (Part 5), verify against the +chain too — exits non-zero on mismatch or missing metadata: + +```bash +obol skills verify bundle.tar.gz \ + --agent-id \ + --skill gas@0.1.0 \ + --chain base-sepolia +``` + +## Part 5: Pin the hash on-chain (seller operator) + +Pin the bundle hash in the ERC-8004 Identity Registry under the metadata key +`skill.sha256:@` (the value is the 64-char ASCII lowercase hex, +explorer-friendly). The CLI prints the target contract and calldata; **you +submit it with your own wallet** — the controller and agents never sign: + +```bash +obol skills calldata set-hash \ + --agent-id \ + --skill gas@0.1.0 \ + --bundle bundle.tar.gz \ + --chain base-sepolia +# IdentityRegistry (base-sepolia): 0x… +# Calldata: 0x… +``` + +Submit with any wallet that owns the agent (example with foundry's `cast`; +never paste a private key into a shared shell or commit it anywhere): + +```bash +cast send \ + --rpc-url --private-key "$YOUR_OPERATOR_KEY" +``` + +Because the canonical packer is deterministic, republishing the same skill +source yields the same hash — the on-chain pin stays valid across offer +re-creation until the skill content actually changes (then bump `--skill-version` +and pin the new ref). + +## Part 6: Rate a skill (buyer operator) + +Feedback rides the ERC-8004 Reputation Registry with the skill tag pair, so +ratings are queryable per skill-version, not just per agent: + +```bash +obol skills calldata feedback \ + --agent-id \ + --skill gas@0.1.0 \ + --value 92 \ + --chain base-sepolia +# ReputationRegistry (base-sepolia): 0x… +# Calldata: 0x… +``` + +Submit the printed calldata with **your own** wallet (same operator-submits +rule as Part 5). Self-feedback from the agent's owner wallet reverts +on-chain. This is the same calldata-printer pattern as the bounty/evaluator +feedback path (`obol bounty feedback`), which writes verdict-derived scores +to the same Reputation Registry. + +Read the aggregate back: + +```bash +obol skills reputation \ + --agent-id \ + --skill gas@0.1.0 \ + --chain base-sepolia \ + [--raters 0xAddr1,0xAddr2] # optional whitelist; default: all raters +``` + +## Selling execution instead of bytes + +When buyers should pay to *use* the skill rather than own a copy, sell the +agent that has the skill installed — through the existing agent path, with +no skill-specific flag: + +```bash +obol agent new quant --skills gas,addresses --model --create-wallet +obol sell agent quant --price 0.001 --chain base-sepolia +``` + +The offer is `type=agent`; the 402 surfaces `extra.agentModel`/ +`extra.agentSkills` and buyers call the agent's OpenAI-compatible endpoint +(`/services//v1/chat/completions`) — prefer `stream: true` for long +generations through a quick tunnel. In short: `obol sell skill` sells the +bundle bytes, `obol sell agent` sells execution. + +## Agents self-publishing skills + +A running agent can publish one of its own skills without the host CLI by +creating the bundle ConfigMap + ServiceOffer directly — its ConfigMap write +RBAC is namespace-scoped (`hermes-skill-publish` Role in +`hermes-obol-agent`), so **both objects must live in the agent's own +namespace**. The full raw-K8s recipe (canonical packing rules, ConfigMap + +ServiceOffer YAML, condition checks) lives in the embedded `sell` skill: +"Selling a Skill Bundle (type=skill)". The on-chain steps (Parts 5-6) remain +operator-only: agents surface the printed calldata, humans submit it. + +## The tag convention (ERC-8239 provenance) + +Skill feedback uses the two ERC-8004 feedback tags as follows: + +| Tag | Value | Example | +|-----|-------|---------| +| `tag1` | `asr:skill` (constant) | `asr:skill` | +| `tag2` | `eip155::::@` | `eip155:84532:0x8004a818…:42:gas@0.1.0` | + +Normalization rules (chosen for determinism): + +- `` is the lowercase hex Identity Registry address of + the chain the offer pays on +- `` is the seller's ERC-8004 token id in decimal +- `@` is the skill ref from `spec.skill` (neither part + may contain `:`) + +This is the **obol interim form** of the tag2 scheme proposed in the +ERC-8239 draft ([ethereum/ERCs PR #1704](https://github.com/ethereum/ERCs/pull/1704)). +The draft is unmerged; we track it and will migrate if the merged form +differs. The matching on-chain integrity pin uses the Identity Registry +metadata key `skill.sha256:@` with the ASCII lowercase +hex sha256 as the value. + +## Limits & caveats (v0) + +- **900000-byte compressed cap** — the artifact rides a ConfigMap. Larger + skills need trimming (or wait for a future artifact backend). +- **Per-request pricing only**; single-shot x402 pay (no buyer sidecar, no + pre-authorized pools) — exactly right for a one-shot download. +- **No card payments** on `sell skill` in v0. +- **Every gated path costs one payment** — `skill.json` included. Use the + free 402 `extra.skill` for pre-purchase checks. +- **Hash semantics**: the binding contract is `sha256(served bytes) == + spec.skill.sha256 == extra.skill.sha256`. Cross-implementation + reproducibility (re-pack from source → same hash) holds only for the + canonical packer; gzip output is implementation-specific. +- **Calldata is operator-submitted.** `obol skills calldata …` and + `obol sell register` print transactions; you sign and send them with your + own wallet. The serviceoffer-controller and agents never sign. +- Quick-tunnel hostnames change on restart; registration documents re-render + on the next reconcile. + +## Related + +- `flows/flow-19-skill-sale.sh` — end-to-end smoke for this guide +- [How to Monetize Your Inference](./monetize-inference.md) — the underlying + sell/x402/registration machinery +- Embedded skills: `sell` ("Selling a Skill Bundle (type=skill)"), + `monetize-guide`, `buy-x402` diff --git a/docs/guides/smoke-test-agent.md b/docs/guides/smoke-test-agent.md new file mode 100644 index 00000000..28b8ae34 --- /dev/null +++ b/docs/guides/smoke-test-agent.md @@ -0,0 +1,301 @@ +# Selling a Smoke-Test Agent + +This guide walks you (the **seller/operator**) through provisioning, selling, +and operating the **smoke-test agent**: a payment-gated agent that buyers hire +per run to health-check the public surface of an Obol Stack deployment. + +For each paid run, the agent: + +1. **Probes** a buyer-supplied target stack URL — strictly **read-only** GETs + against the published public routes (`/skill.md`, `/api/services.json`, + each advertised `/services//*` 402 challenge, and the informational + `/.well-known/agent-registration.json`). It never sends an `X-PAYMENT` + header and never writes anything to the target. +2. **Writes a report** — `report.md` (the canonical committed bytes) and + `results.json` (machine-readable scores) in its workspace. +3. **Commits the report** to a **seller-owned public GitHub repo** at + `reports//.md` and streams the buyer the + `results.json` plus a commit-pinned permalink. +4. Leaves you with everything needed to **submit an ERC-8004 + ValidationRegistry verdict** from your own wallet via + `obol smoke calldata`. The agent and the controller never sign on-chain + transactions — same stance as the bounty pipeline. + +> [!IMPORTANT] +> The monetize subsystem is alpha software. If you encounter an issue, please +> open a [GitHub issue](https://github.com/ObolNetwork/obol-stack/issues). + +> [!WARNING] +> The buyer drives a prompt-injectable agent that holds a GitHub token in its +> environment. Scope that token to **one public report repo, contents +> read/write, nothing else** (see [Step 2](#step-2--create-the-github-secret) +> and [Production guidance](#production-guidance)). The accepted v0 blast +> radius is "an attacker can write junk to the one public report repo" — +> nothing more. + +## System overview + +``` +BUYER (any x402 wallet) SELLER (your obol stack) + +buy.py pay-agent ── x402 payment ──> Traefik /services/smoke-tester/* + "smoke-test " └─> x402-verifier ─> Hermes agent + │ smoke-test skill + │ + read-only GETs (no X-PAYMENT, ever) ▼ +TARGET stack public surface <────────────────────── smoke.py probe + /skill.md smoke.py post ──> GitHub + /api/services.json reports//.md + /services//* (expect 402) + /.well-known/agent-registration.json (informational) + +OPERATOR (you, out of band) + obol smoke calldata ──> validationResponse calldata ──> cast send (YOUR wallet) + ERC-8004 ValidationRegistry +``` + +## Prerequisites + +- A running Obol Stack (`obol stack up`) with the Cloudflare tunnel active so + `/services/*` is publicly reachable. +- A **public** GitHub repository you own for reports (e.g. + `/stack-smoke-reports`). +- A GitHub credential scoped to that repo only (see Step 2). +- For the on-chain verdict: a wallet with ETH for gas on the target chain + (default `base-sepolia`). + +## Step 1 — Provision the agent + +Declare the agent with the `smoke-test` skill. No `--create-wallet` is needed +for v0: the agent never signs anything; you submit the verdict from your own +wallet. + +```bash +obol agent new smoke-tester \ + --skills smoke-test \ + --objective "You are a smoke-test agent. When a buyer says 'smoke-test ', run the smoke-test skill: probe the target read-only, then post the report, then reply with results.json and the permalink." +``` + +This creates an Agent CR in namespace `agent-smoke-tester`; the +serviceoffer-controller provisions a Hermes runtime with the skill mounted at +`/data/.hermes/obol-skills/smoke-test/`. + +## Step 2 — Create the GitHub Secret + +The agent reads `GITHUB_TOKEN` and `GITHUB_REPORT_REPO` from its environment. +Both ride the **existing `hermes-env` Secret** — the runtime-env-override hook +every CRD agent already mounts (`envFrom`, optional). Do **not** invent a new +Secret name: `hermes-env` is the one whitelisted by the admission policy and +RBAC. + +Create a **fine-grained personal access token** (GitHub → Settings → +Developer settings → Fine-grained tokens): + +- **Repository access**: only the report repo (e.g. + `/stack-smoke-reports`). +- **Permissions**: Contents → Read and write. Nothing else. +- **Expiration**: short (30–90 days) and rotate. + +Then create the Secret and restart the agent (the Deployment's checksum +annotation only covers `hermes-config`, so a Secret change needs an explicit +restart): + +```bash +obol kubectl -n agent-smoke-tester create secret generic hermes-env \ + --from-literal=GITHUB_TOKEN=github_pat_XXXXXXXXXXXXXXXXXXXXXX \ + --from-literal=GITHUB_REPORT_REPO=/stack-smoke-reports \ + --dry-run=client -o yaml | obol kubectl apply -f - + +obol kubectl -n agent-smoke-tester rollout restart deploy/hermes +``` + +> [!CAUTION] +> The token lives **only** in the Secret's data. Never put it in the Agent +> CR spec, annotations, labels, status, or any file the agent commits. +> Explicit `env` entries on the Hermes container (e.g. `API_SERVER_KEY`, +> `REMOTE_SIGNER_TOKEN`) always take precedence over `envFrom`, so +> `hermes-env` cannot clobber the runtime's own credentials. + +To rotate: re-run the same two commands with the new token. + +## Step 3 — Sell it + +```bash +obol sell agent smoke-tester \ + --price 0.05 \ + --token USDC \ + --chain base-sepolia \ + --pay-to 0xYourRevenueWallet \ + --description "Paid smoke test: read-only probe of an Obol Stack public surface, report committed to a public GitHub repo" +``` + +This wraps the agent in a `type=agent` ServiceOffer. Check progress with +`obol sell status smoke-tester -n agent-smoke-tester`; once +`UpstreamHealthy`, `PaymentGateReady`, and `RoutePublished` are `True`, the +agent is purchasable at `/services/smoke-tester/v1/chat/completions` on your +tunnel hostname. + +## Step 4 — Buyer journey + +The buyer pays per run with the `buy-x402` skill's one-shot streaming call. +From any buyer agent pod: + +```bash +# 1. Discover pricing + the agent model id (extra.agentModel in the 402 body) +python3 ${OBOL_SKILLS_DIR:-/data/.openclaw/skills}/buy-x402/scripts/buy.py probe \ + https:///services/smoke-tester/v1/chat/completions --type agent + +# 2. Pay for one run (streaming; agent runs can be slow, prefer pay-agent) +python3 ${OBOL_SKILLS_DIR:-/data/.openclaw/skills}/buy-x402/scripts/buy.py pay-agent \ + https:///services/smoke-tester/v1/chat/completions \ + --model "" \ + --message 'smoke-test https://target-stack.example.com' +``` + +The message contract is `smoke-test `. The agent generates a +run id of the form `-<6 hex>` (a buyer may suggest one +in the message; it must match `^[A-Za-z0-9._-]+$`). The streamed reply +contains the full `results.json` — including `passed`/`total`, `score255`, +`score100`, `reportSha256` — and the commit-pinned GitHub permalink. + +Note for buyers: the report lands in the **seller's** public repo, so the +result is publicly auditable but the buyer needs no GitHub credentials. The +buyer's verification path is: fetch the permalink, check +`sha256(report bytes) == reportSha256`, and (once submitted) check the +on-chain validation entry. + +## Step 5 — Where reports live + +In the seller-owned report repo: + +| Path | Content | +|---|---| +| `reports//.md` | The canonical per-run report (committed bytes are what `reportSha256` covers) | +| `reports//latest.md` | Best-effort pointer: run id, score line, permalink of the latest run | + +`` is the lowercase target hostname with `:` rewritten to +`-` (e.g. `obol.stack:8080` → `obol.stack-8080`). The permalink the +buyer receives is commit-pinned +(`https://github.com///blob//reports/...`), so later +runs can never silently rewrite what the buyer was shown. + +Each run performs at most **two** repo writes: one commit for the report, +one best-effort commit for `latest.md`. + +## Step 6 — Submit the on-chain verdict + +The run's identity on-chain is: + +``` +requestHash = keccak256("obol/smoke-test/v1||") +``` + +with the target normalized exactly like the report (`strip()` whitespace, +strip trailing `/`). `results.json` deliberately does **not** contain the +request hash (the agent pod has no keccak256); `obol smoke calldata` derives +it for you: + +```bash +obol smoke calldata \ + --target https://target-stack.example.com \ + --run-id 20260612T093000Z-3fa9c2 \ + --response 87 \ + --response-uri "https://github.com//stack-smoke-reports/blob//reports/target-stack.example.com/20260612T093000Z-3fa9c2.md" \ + --response-hash 0x \ + --network base-sepolia +``` + +Flag-to-report mapping: + +| Flag | Source | +|---|---| +| `--target`, `--run-id` | `results.json` `target` + `runId` (the same normalized values) | +| `--response` | `results.json` **`score100`** — the on-chain value. The deployed registry reverts above 100, so `score255` stays an off-chain field | +| `--response-uri` | the commit-pinned permalink | +| `--response-hash` | `0x` + `results.json` `reportSha256` (sha256 of the committed `report.md` bytes; optional, zero allowed) | + +The command prints the request hash, the ValidationRegistry address for the +chosen network, and the ready-to-submit `validationResponse` calldata +(selector `0x3d659a96`). Submit it with **your own wallet** — never the +agent's: + +```bash +cast send \ + --rpc-url \ + --private-key "$OPERATOR_KEY" +``` + +(Use an environment variable or a hardware/keystore signer — never paste a +private key inline.) + +Anyone can then independently verify the verdict: recompute the request hash +from the public target + run id, fetch the permalink, and check +`sha256(report.md bytes)` against the submitted `responseHash`. + +## Production guidance + +> [!IMPORTANT] +> Read this section before selling runs for real money. It captures the v0 +> trust model and the GitHub operational limits. + +### Prefer GitHub App installation tokens over PATs + +For production, replace the fine-grained PAT with a **GitHub App installation +token**: + +- **Short-lived**: installation tokens expire after ~1 hour, so a leaked + token (the realistic failure mode for a prompt-injected agent) has a small + window. PATs live until rotated. +- **Per-repo by installation**: install the App on only the report repo; + the token cannot be over-scoped by mistake. +- **Higher, separately-bucketed rate limits** than user PATs. + +The trade-off is operational: something must mint a fresh installation token +and refresh the `hermes-env` Secret (`GITHUB_TOKEN`) on a schedule (e.g. a +host-side cron re-running the Step 2 commands). The agent contract is +unchanged — it just reads `GITHUB_TOKEN` from env. + +### v0 trust model: seller-owned repo only + +v0 posts to the **seller-owned public report repo**. There is deliberately +**no buyer token handoff** — a buyer cannot ask the agent to commit into a +buyer-owned repo, and the agent must never accept credentials passed through +chat. Buyer-repo delivery is explicitly out of scope for v0 and is planned as +a v1 feature with a proper credential channel. If a buyer needs a copy, the +report is public — mirror the permalink. + +### GitHub rate limits and acceptable use + +The posting script is built to stay well inside GitHub's +[acceptable use](https://docs.github.com/en/site-policy/acceptable-use-policies) +and secondary rate limits, and you should keep it that way: + +- **Batch: one report commit per run** (plus one best-effort `latest.md` + write) — never per-check or per-probe commits. +- Content writes are the expensive, secondary-rate-limited operation on + GitHub's side; the script honors `Retry-After` (falling back to + `x-ratelimit-reset`) on 403/429, retries 5xx with short exponential + backoff, and gives up within a bounded budget rather than hammering. +- If you operate many sellers against one report repo, expect concurrent- + write 409s (the script re-fetches the blob sha and retries once); beyond + light contention, shard by repo. +- A failed post never loses the run: the report stays in the agent workspace + and `post` is re-runnable. + +### Blast radius recap + +- The smoke agent **never signs or settles anything** — probe-only buyer + side, no `X-PAYMENT`; the operator submits the validation transaction. +- The GitHub token is the only credential it holds; with the scoping above, + the worst case from a hostile buyer prompt is junk commits in one public + report repo. Rotate the token and clean up the repo history if it happens. + +## CI / smoke coverage + +`flows/flow-20-smoke-agent.sh` gates this feature: it compiles the skill +scripts, runs a probe-only self-smoke against the local stack's own public +catalog surface (validating `report.md`/`results.json` and the +`reportSha256` binding), exercises GitHub posting **only** when +`GITHUB_TOKEN` + `GITHUB_REPORT_REPO` are exported (explicit SKIP otherwise, +so CI never needs GitHub), and asserts `obol smoke calldata` emits +`validationResponse` calldata with selector `0x3d659a96`. diff --git a/flows/flow-12-obol-payment.sh b/flows/flow-12-obol-payment.sh index 9b9af25f..3a2a8d77 100755 --- a/flows/flow-12-obol-payment.sh +++ b/flows/flow-12-obol-payment.sh @@ -63,7 +63,10 @@ ARTIFACT_DIR="${FLOW12_ARTIFACT_DIR:-$OBOL_ROOT/.tmp/flow-12-$(date +%Y%m%d-%H%M mkdir -p "$ARTIFACT_DIR" LOG="$ARTIFACT_DIR/test-output.log" set +e -go test -tags integration -v \ +# -count=1 forbids the Go test cache: this test's prerequisites (Ollama +# models, cluster state, facilitator image) live outside the build graph, so +# a cached result silently replays a stale verdict. +go test -tags integration -count=1 -v \ -run '^TestIntegration_SellBuySidecar_OBOLPermit2$' \ -timeout "${FLOW12_TIMEOUT:-30m}" \ ./internal/openclaw/ 2>&1 | tee "$LOG" diff --git a/flows/flow-19-skill-sale.sh b/flows/flow-19-skill-sale.sh new file mode 100755 index 00000000..fb669632 --- /dev/null +++ b/flows/flow-19-skill-sale.sh @@ -0,0 +1,389 @@ +#!/bin/bash +# Flow 19: Skill Sale — type=skill ServiceOffer (skill marketplace v0). +# +# Sells an embedded skill as a paid, hash-pinned bundle download and checks +# the integrity surfaces end to end: +# +# 1. Build obol, confirm the `obol sell skill` / `obol skills` CLI surfaces +# exist (skip cleanly when this branch predates them). +# 2. `obol sell skill --from-embedded ` → bundle ConfigMap + +# type=skill ServiceOffer; controller renders the so--bundle +# busybox httpd and gates /services//* behind x402. +# 3. Assert the bundle ConfigMap bytes hash to spec.skill.sha256 and stay +# under the 900000-byte compressed cap. +# 4. Unpaid probe → 402 with accepts[0].extra.skill {name,version,sha256}; +# sha256 must equal the ConfigMap bytes hash (pre-purchase integrity). +# 5. sha256-verify the served bundle artifact and check it is a well-formed +# gzipped tar with a top-level SKILL.md. +# 6. (Gated: FLOW19_PAID_FETCH=true + funded agent wallet) one-shot paid +# fetch via the buy-x402 skill's buy.py pay. +# 7. Derive ERC-8004 set-hash + feedback (tag1=asr:skill) calldata via +# `obol skills calldata` and assert non-empty, deterministic hex. The +# calldata is OPERATOR-submitted — this flow never signs or sends a tx. +# +# Requires for the cluster section: flows 01-02 (running stack). The calldata +# section runs without a cluster. Cleanup is opt-in via FLOW_CLEANUP=1. +source "$(dirname "$0")/lib.sh" + +SKILL_NAME="${FLOW19_SKILL:-gas}" +SKILL_VERSION="${FLOW19_SKILL_VERSION:-0.1.0}" +OFFER_NAME="${FLOW19_OFFER_NAME:-flow19-${SKILL_NAME}}" +OFFER_NS="${FLOW19_NS:-default}" +PRICE="${FLOW19_PRICE:-0.001}" +FLOW19_CHAIN="${FLOW19_CHAIN:-base-sepolia}" +# Deterministic Hardhat/Anvil test address (account #1) — public test +# constant, same address lib.sh derives from the well-known mnemonic. +PAY_TO="${FLOW19_PAY_TO:-${SELLER_WALLET:-0x70997970C51812dc3A010C7d01b50e0d17dc79C8}}" +FEEDBACK_AGENT_ID="${FLOW19_AGENT_ID:-1}" +FEEDBACK_VALUE="${FLOW19_FEEDBACK_VALUE:-95}" +BUNDLE_FILE="" +CM_SHA="" + +sha256_file() { + python3 - "$1" <<'PY' +import hashlib, sys +with open(sys.argv[1], "rb") as f: + print(hashlib.sha256(f.read()).hexdigest()) +PY +} + +# §0: Prerequisites + build +step "required local tools are available" +require_tool python3 +pass "required tools found" + +step "build obol CLI" +if [ "${FLOW19_SKIP_BUILD:-false}" = "true" ] && [ -x "$OBOL" ]; then + skip "FLOW19_SKIP_BUILD=true — using existing $OBOL" +elif command -v go >/dev/null 2>&1; then + if build_out=$(cd "$OBOL_ROOT" && go build -o "$OBOL_BIN_DIR/obol" ./cmd/obol 2>&1); then + pass "built $OBOL_BIN_DIR/obol" + else + fail "go build failed — ${build_out:0:300}" + emit_metrics + exit 1 + fi +elif [ -x "$OBOL" ]; then + skip "go not on PATH — using existing $OBOL" +else + fail "no go toolchain and no prebuilt obol at $OBOL" + emit_metrics + exit 1 +fi + +# §1: CLI surface gates — skip cleanly on branches that predate the +# skill-marketplace CLI instead of failing the whole flow. +HAVE_SELL_SKILL=0 +step "obol sell skill subcommand present" +sell_help=$("$OBOL" sell --help 2>&1 || true) +if echo "$sell_help" | grep -qE '^[[:space:]]+skill([[:space:],]|$)'; then + HAVE_SELL_SKILL=1 + pass "obol sell skill is available" +else + skip "obol sell skill not in this build — skipping sell/cluster section" +fi + +HAVE_SKILLS_CMD=0 +step "obol skills command group present" +root_help=$("$OBOL" --help 2>&1 || true) +if echo "$root_help" | grep -qE '^[[:space:]]+skills([[:space:],]|$)'; then + HAVE_SKILLS_CMD=1 + pass "obol skills is available" +else + skip "obol skills not in this build — skipping calldata section" +fi + +# §2: Cluster gate +CLUSTER_OK=0 +step "local stack reachable" +if [ -f "$OBOL_CONFIG_DIR/.stack-id" ] && [ -f "$OBOL_CONFIG_DIR/kubeconfig.yaml" ] \ + && "$OBOL" kubectl cluster-info >/dev/null 2>&1; then + CLUSTER_OK=1 + pass "cluster reachable via $OBOL_CONFIG_DIR/kubeconfig.yaml" +else + skip "no running local stack — skipping sell/cluster section" +fi + +if [ "$HAVE_SELL_SKILL" = "1" ] && [ "$CLUSTER_OK" = "1" ]; then + # §3: Sell the embedded skill + step "obol sell skill $OFFER_NAME --from-embedded $SKILL_NAME" + sell_out=$("$OBOL" sell skill "$OFFER_NAME" \ + --from-embedded "$SKILL_NAME" \ + --skill-version "$SKILL_VERSION" \ + --per-request "$PRICE" \ + --chain "$FLOW19_CHAIN" \ + --pay-to "$PAY_TO" \ + --no-register \ + -n "$OFFER_NS" 2>&1) && sell_rc=0 || sell_rc=$? + if [ "${sell_rc:-1}" -eq 0 ]; then + pass "sell skill accepted" + else + fail "obol sell skill exited $sell_rc — ${sell_out:0:300}" + fi + + # §3.1: ServiceOffer landed with type=skill + spec.skill block + step "ServiceOffer $OFFER_NAME has spec.type=skill and spec.skill" + so_type=$("$OBOL" kubectl get serviceoffer "$OFFER_NAME" -n "$OFFER_NS" \ + -o jsonpath='{.spec.type}' 2>/dev/null || true) + SPEC_SHA=$("$OBOL" kubectl get serviceoffer "$OFFER_NAME" -n "$OFFER_NS" \ + -o jsonpath='{.spec.skill.sha256}' 2>/dev/null || true) + SPEC_VERSION=$("$OBOL" kubectl get serviceoffer "$OFFER_NAME" -n "$OFFER_NS" \ + -o jsonpath='{.spec.skill.version}' 2>/dev/null || true) + BUNDLE_CM=$("$OBOL" kubectl get serviceoffer "$OFFER_NAME" -n "$OFFER_NS" \ + -o jsonpath='{.spec.skill.bundleConfigMap}' 2>/dev/null || true) + if [ "$so_type" = "skill" ] && echo "$SPEC_SHA" | grep -qE '^[a-f0-9]{64}$' \ + && [ "$SPEC_VERSION" = "$SKILL_VERSION" ] && [ -n "$BUNDLE_CM" ]; then + pass "spec.skill: version=$SPEC_VERSION cm=$BUNDLE_CM sha256=${SPEC_SHA:0:12}…" + else + fail "spec mismatch: type=$so_type version=$SPEC_VERSION cm=$BUNDLE_CM sha=$SPEC_SHA" + fi + + # §3.2: Bundle ConfigMap bytes — size cap + hash pin. These are the exact + # bytes the controller hash-verifies and the bundle server serves; a + # mismatch here means the controller must refuse to publish + # (BundleHashMismatch). + step "bundle ConfigMap bytes hash to spec.skill.sha256 (<=900000 bytes)" + BUNDLE_FILE="$(mktemp -t flow19-bundle-XXXXXX).tar.gz" + "$OBOL" kubectl get configmap "$BUNDLE_CM" -n "$OFFER_NS" \ + -o jsonpath='{.binaryData.bundle\.tar\.gz}' 2>/dev/null \ + | python3 -c 'import base64,sys; sys.stdout.buffer.write(base64.b64decode(sys.stdin.read()))' \ + > "$BUNDLE_FILE" || true + bundle_size=$(wc -c < "$BUNDLE_FILE" | tr -d ' ') + CM_SHA=$(sha256_file "$BUNDLE_FILE" 2>/dev/null || true) + if [ "${bundle_size:-0}" -gt 0 ] && [ "${bundle_size:-0}" -le 900000 ] \ + && [ "$CM_SHA" = "$SPEC_SHA" ]; then + pass "bundle $bundle_size bytes, sha256 matches spec (${CM_SHA:0:12}…)" + else + fail "bundle bytes invalid: size=$bundle_size cmSha=$CM_SHA specSha=$SPEC_SHA" + fi + + # §3.3: Controller convergence. Registration is disabled (--no-register), + # so the full ladder ends at Ready=True. Anchored grep — a bare + # "Ready=True" would substring-match "PaymentGateReady=True". + step "ServiceOffer $OFFER_NAME reaches Ready=True (polling, max 60x5s)" + so_ready="" + conds="" + for _ in $(seq 1 60); do + conds=$("$OBOL" kubectl get serviceoffer "$OFFER_NAME" -n "$OFFER_NS" \ + -o jsonpath='{range .status.conditions[*]}{.type}={.status} {end}' 2>/dev/null || true) + if echo "$conds" | grep -qE '(^| )Ready=True'; then + so_ready="yes" + break + fi + sleep 5 + done + if [ -n "$so_ready" ]; then + pass "ServiceOffer Ready (conditions: $conds)" + else + fail "ServiceOffer not Ready within 300s — conditions: ${conds:-unreadable}" + "$OBOL" kubectl get serviceoffer "$OFFER_NAME" -n "$OFFER_NS" \ + -o jsonpath='{range .status.conditions[*]}{.type}: {.reason} — {.message}{"\n"}{end}' 2>/dev/null || true + fi + + # §3.4: Controller-rendered bundle server children exist + step "bundle server so-$OFFER_NAME-bundle rendered" + bundle_deploy=$("$OBOL" kubectl get deploy "so-$OFFER_NAME-bundle" -n "$OFFER_NS" \ + -o jsonpath='{.metadata.name}' 2>/dev/null || true) + if [ "$bundle_deploy" = "so-$OFFER_NAME-bundle" ]; then + pass "Deployment so-$OFFER_NAME-bundle exists" + else + fail "bundle server Deployment missing in $OFFER_NS" + fi + + # §4: Unpaid probe → 402 carries extra.skill with the pinned hash. + # Retry loop per pitfall 14 (first-request race on a fresh verifier route). + refresh_obol_ingress_env + BASE_URL="${OBOL_INGRESS_URL%/}" + if [[ "$BASE_URL" == *"obol.stack"* ]]; then + CURL_BASE="$CURL_OBOL" + else + CURL_BASE="curl" + fi + + step "402 response carries extra.skill {name,version,sha256} (polling, max 12x5s)" + EXTRA_SHA="" + body_402="" + for _ in $(seq 1 12); do + body_402=$($CURL_BASE -s --max-time 10 \ + "$BASE_URL/services/$OFFER_NAME/bundle.tar.gz" 2>&1) || true + EXTRA_SHA=$(echo "$body_402" | python3 -c " +import json, re, sys +d = json.load(sys.stdin) +skill = (d['accepts'][0].get('extra') or {}).get('skill') or {} +assert skill.get('name'), 'skill.name missing' +assert skill.get('version') == '$SKILL_VERSION', 'skill.version mismatch' +assert re.fullmatch(r'[a-f0-9]{64}', skill.get('sha256', '')), 'skill.sha256 malformed' +print(skill['sha256']) +" 2>/dev/null) || EXTRA_SHA="" + [ -n "$EXTRA_SHA" ] && break + sleep 5 + done + if [ -n "$EXTRA_SHA" ]; then + pass "402 extra.skill present (sha256 ${EXTRA_SHA:0:12}…)" + else + fail "402 missing/invalid extra.skill — ${body_402:0:300}" + fi + + step "402-advertised sha256 equals served bundle bytes hash" + if [ -n "$EXTRA_SHA" ] && [ "$EXTRA_SHA" = "$CM_SHA" ]; then + pass "pre-purchase integrity holds: extra.skill.sha256 == sha256(bundle bytes)" + else + fail "hash mismatch: 402=$EXTRA_SHA bundle=$CM_SHA" + fi + + # §4.1: Artifact well-formedness — gzipped tar with top-level SKILL.md. + # A bundle without SKILL.md is not a skill. + step "bundle is a gzipped tar containing top-level SKILL.md" + tar_list=$(tar -tzf "$BUNDLE_FILE" 2>&1 || true) + if echo "$tar_list" | grep -qx "SKILL.md"; then + pass "SKILL.md present ($(echo "$tar_list" | grep -c . ) entries)" + else + fail "top-level SKILL.md missing from bundle — entries: $(echo "$tar_list" | head -5 | tr '\n' ' ')" + fi + + # §5: One-shot paid fetch via buy-x402 buy.py pay (gated — needs a funded + # agent wallet on $FLOW19_CHAIN; spends one auth = $PRICE). + # + # Target /skill.json (text JSON): buy.py pay prints the response body via + # decode(errors="replace") and is not binary-safe, so the gzip artifact is + # byte-verified from the ConfigMap above; the paid request proves the + # 402 → sign → X-PAYMENT → 200 loop on the same gated route. + step "one-shot paid fetch via buy.py pay (FLOW19_PAID_FETCH gate)" + if [ "${FLOW19_PAID_FETCH:-false}" != "true" ]; then + skip "FLOW19_PAID_FETCH != true — paid fetch not attempted" + elif ! "$OBOL" kubectl get deploy hermes -n hermes-obol-agent >/dev/null 2>&1; then + skip "hermes agent deployment not found — paid fetch needs the default agent" + else + pay_out=$("$OBOL" kubectl exec -n hermes-obol-agent deploy/hermes -c hermes -- \ + python3 /data/.hermes/obol-skills/buy-x402/scripts/buy.py pay \ + "http://traefik.traefik.svc.cluster.local/services/$OFFER_NAME/skill.json" \ + --timeout 60 2>&1) || true + if echo "$pay_out" | grep -q "HTTP 200" \ + && echo "$pay_out" | grep -q "$CM_SHA"; then + pass "paid fetch returned 200 with the pinned sha256 in skill.json" + else + fail "paid fetch failed — ${pay_out:0:400}" + fi + fi +fi + +# §6: ERC-8004 calldata derivation (no cluster, no chain, no signing). +# set-hash pins sha256(bundle) under metadata key skill.sha256:@; +# feedback scores the skill with tag1=asr:skill. Both print calldata for the +# OPERATOR to submit with their own wallet — assert non-empty deterministic hex. +if [ "$HAVE_SKILLS_CMD" = "1" ]; then + SKILL_REF="$SKILL_NAME@$SKILL_VERSION" + + # Without a cluster run there is no ConfigMap bundle — pack the embedded + # skill dir from the repo so set-hash has bytes to hash. Determinism of + # the calldata is asserted by running each command twice on the same + # input, not by reproducing the canonical Go packer here. + if [ -z "$BUNDLE_FILE" ] || [ ! -s "$BUNDLE_FILE" ]; then + step "pack local fallback bundle for calldata derivation" + BUNDLE_FILE="$(mktemp -t flow19-bundle-XXXXXX).tar.gz" + if python3 - "$OBOL_ROOT/internal/embed/skills/$SKILL_NAME" "$BUNDLE_FILE" <<'PY' +import gzip, io, os, sys, tarfile + +src, out = sys.argv[1], sys.argv[2] +if not os.path.isfile(os.path.join(src, "SKILL.md")): + raise SystemExit(f"not a skill dir (no SKILL.md): {src}") +paths = [] +for root, dirs, files in os.walk(src): + dirs[:] = sorted(d for d in dirs if d != "__pycache__") + for name in sorted(dirs + files): + p = os.path.join(root, name) + if os.path.islink(p): + raise SystemExit(f"symlink not allowed: {p}") + if name.endswith(".pyc"): + continue + paths.append(p) +paths.sort(key=lambda p: os.path.relpath(p, src).replace(os.sep, "/")) +buf = io.BytesIO() +with tarfile.open(fileobj=buf, mode="w", format=tarfile.USTAR_FORMAT) as tf: + for p in paths: + rel = os.path.relpath(p, src).replace(os.sep, "/") + if os.path.isdir(p): + info = tarfile.TarInfo(rel + "/") + info.type, info.mode = tarfile.DIRTYPE, 0o755 + info.mtime = info.uid = info.gid = 0 + info.uname = info.gname = "" + tf.addfile(info) + else: + with open(p, "rb") as f: + data = f.read() + info = tarfile.TarInfo(rel) + info.size = len(data) + info.mode = 0o755 if os.stat(p).st_mode & 0o111 else 0o644 + info.mtime = info.uid = info.gid = 0 + info.uname = info.gname = "" + tf.addfile(info, io.BytesIO(data)) +# filename="" — a named fileobj would leak the output path into the gzip +# FNAME header and break run-to-run determinism. +with open(out, "wb") as f: + with gzip.GzipFile(filename="", fileobj=f, mode="wb", compresslevel=9, mtime=0) as gz: + gz.write(buf.getvalue()) +PY + then + pass "fallback bundle packed ($(wc -c < "$BUNDLE_FILE" | tr -d ' ') bytes)" + else + fail "could not pack fallback bundle from internal/embed/skills/$SKILL_NAME" + fi + fi + + step "obol skills calldata set-hash prints deterministic calldata" + sethash_1=$("$OBOL" skills calldata set-hash \ + --agent-id "$FEEDBACK_AGENT_ID" \ + --chain "$FLOW19_CHAIN" \ + --skill "$SKILL_REF" \ + --bundle "$BUNDLE_FILE" 2>&1) || true + sethash_2=$("$OBOL" skills calldata set-hash \ + --agent-id "$FEEDBACK_AGENT_ID" \ + --chain "$FLOW19_CHAIN" \ + --skill "$SKILL_REF" \ + --bundle "$BUNDLE_FILE" 2>&1) || true + sethash_hex=$(echo "$sethash_1" | grep -oE 'Calldata: 0x[0-9a-fA-F]+' | head -1 | awk '{print $2}') + if [ -n "$sethash_hex" ] && [ ${#sethash_hex} -gt 10 ] \ + && [ "$sethash_1" = "$sethash_2" ] \ + && echo "$sethash_1" | grep -qE 'IdentityRegistry.*0x[0-9a-fA-F]{40}'; then + pass "set-hash calldata deterministic (${#sethash_hex} hex chars)" + else + fail "set-hash calldata missing or non-deterministic — ${sethash_1:0:300}" + fi + + step "obol skills calldata feedback (tag1=asr:skill) prints deterministic calldata" + fb_1=$("$OBOL" skills calldata feedback \ + --agent-id "$FEEDBACK_AGENT_ID" \ + --skill "$SKILL_REF" \ + --value "$FEEDBACK_VALUE" \ + --chain "$FLOW19_CHAIN" 2>&1) || true + fb_2=$("$OBOL" skills calldata feedback \ + --agent-id "$FEEDBACK_AGENT_ID" \ + --skill "$SKILL_REF" \ + --value "$FEEDBACK_VALUE" \ + --chain "$FLOW19_CHAIN" 2>&1) || true + fb_hex=$(echo "$fb_1" | grep -oE 'Calldata: 0x[0-9a-fA-F]+' | head -1 | awk '{print $2}') + if [ -n "$fb_hex" ] && [ ${#fb_hex} -gt 10 ] \ + && [ "$fb_1" = "$fb_2" ] \ + && echo "$fb_1" | grep -qE 'ReputationRegistry.*0x[0-9a-fA-F]{40}'; then + pass "feedback calldata deterministic (${#fb_hex} hex chars)" + else + fail "feedback calldata missing or non-deterministic — ${fb_1:0:300}" + fi + + step "set-hash and feedback calldata differ (distinct selectors/payloads)" + if [ -n "$sethash_hex" ] && [ -n "$fb_hex" ] && [ "$sethash_hex" != "$fb_hex" ]; then + pass "calldata payloads are distinct" + else + fail "calldata sanity failed: set-hash=$sethash_hex feedback=$fb_hex" + fi +fi + +# §7: Cleanup — opt-in so a successful run leaves the offer for inspection. +if [ "${FLOW_CLEANUP:-0}" = "1" ] && [ "$CLUSTER_OK" = "1" ] && [ "$HAVE_SELL_SKILL" = "1" ]; then + "$OBOL" sell delete "$OFFER_NAME" -n "$OFFER_NS" >/dev/null 2>&1 || true + [ -n "${BUNDLE_CM:-}" ] && "$OBOL" kubectl delete configmap "$BUNDLE_CM" -n "$OFFER_NS" \ + --ignore-not-found >/dev/null 2>&1 || true +fi +[ -n "$BUNDLE_FILE" ] && rm -f "$BUNDLE_FILE" 2>/dev/null || true + +emit_metrics diff --git a/flows/flow-20-smoke-agent.sh b/flows/flow-20-smoke-agent.sh new file mode 100755 index 00000000..6dd2d888 --- /dev/null +++ b/flows/flow-20-smoke-agent.sh @@ -0,0 +1,367 @@ +#!/bin/bash +# Flow 20: Smoke-test agent — sellable read-only prober for Obol Stack +# public surfaces (skill: internal/embed/skills/smoke-test). +# +# Coverage: +# §1 Host-side syntax gate — python3 -m py_compile of the embedded +# smoke-test skill scripts (smoke.py + gh_post.py). +# §2 Self-smoke (cluster-gated) — run smoke.py probe against THIS +# stack's public catalog surface through the Traefik ingress; +# assert report.md + results.json are well-formed, score255/score100 +# are in range and internally consistent, and reportSha256 matches +# the exact bytes of report.md on disk. +# §3 GitHub posting (env-gated) — ONLY when GITHUB_TOKEN and +# GITHUB_REPORT_REPO are both set; posts the §2 report and asserts a +# commit-pinned permalink. Explicit SKIP otherwise so CI never needs +# GitHub credentials. +# §4 Verdict calldata — build obol, run `obol smoke calldata` with +# fixed inputs, assert non-empty calldata carrying the +# validationResponse(bytes32,uint8,string,bytes32,string) selector +# 0x3d659a96, and that target normalization (trailing slash) does +# not change the derived request hash. +# +# The probe path is strictly read-only: GET-only requests against the +# published public routes (/skill.md, /api/services.json, /services/*, +# /.well-known/agent-registration.json). No X-PAYMENT header is ever sent +# and nothing in the cluster is mutated. +# +# scrub_secrets-safe: GITHUB_TOKEN is read from the environment only — +# never echoed, never placed in argv — and every captured output that +# could embed it is redacted before printing. +# +# Env overrides: +# FLOW20_TARGET probe target base URL (default: this stack's +# ingress with obol.stack rewritten to 127.0.0.1 so +# python3/urllib needs no special DNS resolution) +# GITHUB_TOKEN fine-grained PAT for the seller-owned report repo +# GITHUB_REPORT_REPO / public report repository +source "$(dirname "$0")/lib.sh" + +require_tool python3 + +SKILL_SCRIPTS_DIR="$OBOL_ROOT/internal/embed/skills/smoke-test/scripts" +SMOKE_PY="$SKILL_SCRIPTS_DIR/smoke.py" +GH_POST_PY="$SKILL_SCRIPTS_DIR/gh_post.py" + +FLOW_STATE_DIR="$OBOL_ROOT/.workspace/state/flows" +RUN_ROOT="$FLOW_STATE_DIR/flow20-$(date +%Y%m%d-%H%M%S)-$$" +mkdir -p "$RUN_ROOT" + +# Redact the GitHub token from any captured output before it is printed. +# scrub_secrets (lib.sh) does not know about GitHub PATs, so this flow +# guarantees the token never reaches stdout/stderr on its own. +redact_gh_token() { + local text="$1" + if [ -n "${GITHUB_TOKEN:-}" ]; then + text="${text//${GITHUB_TOKEN}/[REDACTED-GH-TOKEN]}" + fi + printf '%s' "$text" +} + +# py_compile with an explicit cfile in the flow workspace — the default +# cfile would drop a __pycache__/ dir inside internal/embed/skills/, +# polluting the repo checkout the flow runs from. +py_compile_check() { + python3 -c 'import py_compile, sys; py_compile.compile(sys.argv[1], cfile=sys.argv[2], doraise=True)' \ + "$1" "$RUN_ROOT/$(basename "$1").pyc" +} + +# §1: Host-side compile gate — the skill scripts must at least be valid +# python3 before anything ships them into an agent PVC. +step "smoke.py compiles (python3 -m py_compile)" +if [ ! -f "$SMOKE_PY" ]; then + fail "smoke-test skill script missing: $SMOKE_PY" +else + compile_out="" + if compile_out=$(py_compile_check "$SMOKE_PY" 2>&1); then + pass "smoke.py compiles" + else + fail "smoke.py failed py_compile — ${compile_out:0:200}" + fi +fi + +step "gh_post.py compiles (python3 -m py_compile)" +if [ ! -f "$GH_POST_PY" ]; then + # Tolerated layout difference: posting may live in `smoke.py post` + # instead of a dedicated gh_post.py. §3 falls back accordingly. + skip "gh_post.py not present at $GH_POST_PY — assuming posting lives in 'smoke.py post'" +else + compile_out="" + if compile_out=$(py_compile_check "$GH_POST_PY" 2>&1); then + pass "gh_post.py compiles" + else + fail "gh_post.py failed py_compile — ${compile_out:0:200}" + fi +fi + +# §2: Self-smoke against this stack's public catalog surface. +# Cluster-gated: every step here SKIPs cleanly when no local stack is up. +CLUSTER_UP="" +if [ -x "$OBOL" ] \ + && [ -f "$OBOL_CONFIG_DIR/.stack-id" ] \ + && [ -f "$OBOL_CONFIG_DIR/kubeconfig.yaml" ] \ + && "$OBOL" kubectl cluster-info >/dev/null 2>&1; then + CLUSTER_UP="1" +fi + +TARGET="" +SELF_SMOKE_READY="" +step "Public catalog surface reachable (GET /api/services.json)" +if [ -z "$CLUSTER_UP" ]; then + skip "no local stack (config/kubeconfig/cluster unreachable) — self-smoke steps skipped" +else + refresh_obol_ingress_env + # python3/urllib cannot use curl's --resolve, so default the probe + # target to the loopback form of the ingress. The catalog routes have + # no hostname restriction (public by design), so Host: 127.0.0.1 is + # routed identically to obol.stack. + TARGET="${FLOW20_TARGET:-${OBOL_INGRESS_URL/obol.stack/127.0.0.1}}" + TARGET="${TARGET%/}" + catalog_code="" + # Small retry: the controller-served catalog can lag right after the + # route is wired (same first-request race as flows 07/08). + for _ in 1 2 3 4 5 6; do + catalog_code=$(curl -s -o /dev/null -w '%{http_code}' --max-time 8 \ + "$TARGET/api/services.json" 2>/dev/null) || true + [ "$catalog_code" = "200" ] && break + sleep 5 + done + if [ "$catalog_code" = "200" ]; then + pass "catalog surface up at $TARGET/api/services.json" + SELF_SMOKE_READY="1" + else + fail "catalog surface not reachable at $TARGET/api/services.json (HTTP ${catalog_code:-none})" + fi +fi + +RUN_ID="$(date -u +%Y%m%dT%H%M%SZ)-flow20" +RUN_DIR="" +step "smoke.py probe ${TARGET:-} (run id $RUN_ID)" +if [ -z "$SELF_SMOKE_READY" ]; then + skip "self-smoke target unavailable — probe skipped" +elif [ ! -f "$SMOKE_PY" ]; then + skip "smoke.py missing — probe skipped (§1 already failed)" +else + probe_rc=0 + # Worst case per the skill budget: 8 probes x 8s + report writes; 180s + # is comfortably above that without masking a hang. -B because smoke.py + # imports gh_post — without it python drops a __pycache__/ dir into + # internal/embed/skills/smoke-test/scripts/ in this checkout. + probe_out=$(cd "$RUN_ROOT" && run_with_timeout 180 \ + python3 -B "$SMOKE_PY" probe "$TARGET" --run-id "$RUN_ID" 2>&1) || probe_rc=$? + if [ "$probe_rc" -eq 0 ]; then + RUN_DIR=$(find "$RUN_ROOT/smoke" -type d -name "$RUN_ID" 2>/dev/null | head -1 || true) + if [ -n "$RUN_DIR" ] && [ -f "$RUN_DIR/results.json" ] && [ -f "$RUN_DIR/report.md" ]; then + pass "probe wrote $RUN_DIR/{report.md,results.json}" + else + RUN_DIR="" + fail "probe exited 0 but run dir/artifacts not found under $RUN_ROOT/smoke — ${probe_out:0:200}" + fi + else + fail "smoke.py probe exited $probe_rc — ${probe_out:0:300}" + fi +fi + +step "results.json + report.md well-formed (scores in range, reportSha256 matches file)" +if [ -z "$RUN_DIR" ]; then + skip "no probe artifacts — validation skipped" +else + validate_rc=0 + validate_out=$(python3 - "$RUN_DIR" "$RUN_ID" <<'PY' 2>&1 +import hashlib +import json +import re +import sys + +run_dir, run_id = sys.argv[1], sys.argv[2] + +with open(run_dir + "/results.json", encoding="utf-8") as fh: + results = json.load(fh) +with open(run_dir + "/report.md", "rb") as fh: + report = fh.read() + +assert report.startswith(b"# Obol Stack Smoke Report"), "report.md missing canonical header" + +assert results.get("version") == "obol/smoke-test/v1", f"version={results.get('version')!r}" +assert results.get("runId") == run_id, f"runId={results.get('runId')!r} want {run_id!r}" +assert isinstance(results.get("target"), str) and results["target"], "target missing" +assert not results["target"].endswith("/"), "target not normalized (trailing slash)" + +checks = results.get("checks") +assert isinstance(checks, list) and checks, "checks missing/empty" +for c in checks: + assert isinstance(c, dict) and c.get("name"), "check entry malformed" + assert isinstance(c.get("ok"), bool), f"check {c.get('name')}: ok not bool" + assert isinstance(c.get("ms"), (int, float)) and c["ms"] >= 0, f"check {c.get('name')}: ms invalid" + +names = {c["name"] for c in checks} +assert "skill-md" in names, "skill-md check missing" +assert "services-json" in names, "services-json check missing" + +counted = [c for c in checks if not c.get("informational")] +passed, total = results.get("passed"), results.get("total") +assert total == len(counted), f"total={total} != counted checks {len(counted)}" +assert total >= 2, f"total={total} < 2 (skill-md + services-json are always counted)" +assert passed == sum(1 for c in counted if c["ok"]), "passed != recount of ok counted checks" +assert 0 <= passed <= total, f"passed={passed} out of range" + +score255, score100 = results.get("score255"), results.get("score100") +assert score255 == (255 * passed) // total, f"score255={score255} != floor(255*{passed}/{total})" +assert 0 <= score255 <= 255, f"score255={score255} out of range" +assert score100 == (100 * passed) // total, f"score100={score100} != floor(100*{passed}/{total})" +assert 0 <= score100 <= 100, f"score100={score100} out of range" + +sha = results.get("reportSha256", "") +assert re.fullmatch(r"[0-9a-f]{64}", sha or ""), f"reportSha256 not 64 lowercase hex: {sha!r}" +assert sha == hashlib.sha256(report).hexdigest(), "reportSha256 does not match report.md bytes" + +# Probe-only run: permalink stays empty until a post succeeds. +assert results.get("permalink", "") == "", "permalink non-empty before post" + +print(f"OK passed={passed} total={total} score255={score255} score100={score100} sha256={sha[:12]}…") +PY + ) || validate_rc=$? + if [ "$validate_rc" -eq 0 ]; then + pass "artifacts valid — $validate_out" + else + fail "artifact validation failed — ${validate_out:0:300}" + fi +fi + +# §3: GitHub posting — strictly env-gated. CI runs probe-only and must see +# an explicit SKIP here, never an attempted network write. +step "Post report to GitHub (gh_post)" +if [ -z "${GITHUB_TOKEN:-}" ] || [ -z "${GITHUB_REPORT_REPO:-}" ]; then + skip "GITHUB_TOKEN / GITHUB_REPORT_REPO not set — GitHub posting step skipped (probe-only mode)" +elif [ -z "$RUN_DIR" ]; then + skip "no successful probe run dir — nothing to post" +else + post_rc=0 + if [ -f "$GH_POST_PY" ]; then + post_out=$(cd "$RUN_ROOT" && run_with_timeout 60 \ + python3 -B "$GH_POST_PY" "$RUN_DIR" 2>&1) || post_rc=$? + else + post_out=$(cd "$RUN_ROOT" && run_with_timeout 60 \ + python3 -B "$SMOKE_PY" post "$RUN_DIR" 2>&1) || post_rc=$? + fi + post_out=$(redact_gh_token "$post_out") + if [ "$post_rc" -ne 0 ]; then + fail "GitHub post exited $post_rc — ${post_out:0:300}" + else + permalink_rc=0 + permalink_out=$(python3 - "$RUN_DIR" <<'PY' 2>&1 +import json +import re +import sys + +with open(sys.argv[1] + "/results.json", encoding="utf-8") as fh: + results = json.load(fh) + +permalink = results.get("permalink", "") +# Commit-pinned blob URL — never a branch-floating html_url. +assert re.match(r"^https://github\.com/[^/]+/[^/]+/blob/[0-9a-f]{7,40}/", permalink or ""), \ + f"permalink not a commit-pinned GitHub blob URL: {permalink!r}" +print(f"OK permalink={permalink}") +PY + ) || permalink_rc=$? + if [ "$permalink_rc" -eq 0 ]; then + pass "report posted — $permalink_out" + else + fail "post succeeded but permalink invalid — ${permalink_out:0:300}" + fi + fi +fi + +# §4: Verdict calldata derivation — `obol smoke calldata` must emit +# validationResponse(bytes32,uint8,string,bytes32,string) calldata +# (selector 0x3d659a96) for the operator to submit with their own wallet. +CALLDATA_OBOL="" +step "Build obol for calldata derivation" +if ! command -v go >/dev/null 2>&1; then + skip "go toolchain not on PATH — calldata steps skipped" +else + build_rc=0 + build_out=$(cd "$OBOL_ROOT" && run_with_timeout 600 \ + go build -o "$RUN_ROOT/obol" ./cmd/obol 2>&1) || build_rc=$? + if [ "$build_rc" -eq 0 ] && [ -x "$RUN_ROOT/obol" ]; then + CALLDATA_OBOL="$RUN_ROOT/obol" + pass "obol built at $RUN_ROOT/obol" + else + fail "go build ./cmd/obol failed — ${build_out:0:300}" + fi +fi + +# Fixed inputs — deterministic across runs so the request-hash stability +# assertion below is meaningful. The response hash is the sha256 of the +# empty string (a recognizable, obviously-synthetic 32-byte value). +FIXED_TARGET="http://obol.stack:8080" +FIXED_RUN_ID="20260101T000000Z-cafe01" +FIXED_RESPONSE_HASH="0xe3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855" +FIXED_RESPONSE_URI="https://github.com/example-org/example-reports/blob/0000000000000000000000000000000000000000/reports/obol.stack-8080/${FIXED_RUN_ID}.md" + +step "obol smoke calldata emits selector 0x3d659a96" +if [ -z "$CALLDATA_OBOL" ]; then + skip "obol binary unavailable — calldata derivation skipped" +else + calldata_rc=0 + # Trailing slash on --target on purpose: the request hash must be + # derived from the NORMALIZED target (trailing slash stripped). + calldata_out=$(run_with_timeout 30 "$CALLDATA_OBOL" smoke calldata \ + --target "$FIXED_TARGET/" \ + --run-id "$FIXED_RUN_ID" \ + --response 100 \ + --response-uri "$FIXED_RESPONSE_URI" \ + --response-hash "$FIXED_RESPONSE_HASH" \ + --network base-sepolia 2>&1) || calldata_rc=$? + request_hash=$(echo "$calldata_out" | grep -oE 'Request hash: 0x[0-9a-fA-F]{64}' | head -1 | awk '{print $3}' || true) + if [ "$calldata_rc" -ne 0 ]; then + fail "obol smoke calldata exited $calldata_rc — ${calldata_out:0:300}" + elif [ -z "$request_hash" ] || [ "$request_hash" = "0x0000000000000000000000000000000000000000000000000000000000000000" ]; then + fail "request hash missing or zero — ${calldata_out:0:300}" + elif ! echo "$calldata_out" | grep -qE 'ValidationRegistry \(base-sepolia\): 0x[0-9a-fA-F]{40}'; then + fail "ValidationRegistry address line missing — ${calldata_out:0:300}" + elif echo "$calldata_out" | grep -qE 'Calldata: 0x3d659a96[0-9a-fA-F]+'; then + pass "calldata carries validationResponse selector 0x3d659a96 (request hash $request_hash)" + else + fail "calldata missing or wrong selector (want 0x3d659a96) — ${calldata_out:0:300}" + fi +fi + +step "Request hash stable under target normalization (trailing slash)" +if [ -z "$CALLDATA_OBOL" ] || [ -z "${request_hash:-}" ]; then + skip "no baseline request hash — normalization check skipped" +else + norm_rc=0 + norm_out=$(run_with_timeout 30 "$CALLDATA_OBOL" smoke calldata \ + --target "$FIXED_TARGET" \ + --run-id "$FIXED_RUN_ID" \ + --response 100 \ + --response-uri "$FIXED_RESPONSE_URI" \ + --response-hash "$FIXED_RESPONSE_HASH" \ + --network base-sepolia 2>&1) || norm_rc=$? + norm_hash=$(echo "$norm_out" | grep -oE 'Request hash: 0x[0-9a-fA-F]{64}' | head -1 | awk '{print $3}' || true) + if [ "$norm_rc" -eq 0 ] && [ -n "$norm_hash" ] && [ "$norm_hash" = "$request_hash" ]; then + pass "trailing-slash and bare target derive the same request hash" + else + fail "request hash drifted under normalization: with-slash=$request_hash bare=${norm_hash:-none} (exit $norm_rc)" + fi +fi + +step "obol smoke calldata rejects --response > 100" +if [ -z "$CALLDATA_OBOL" ]; then + skip "obol binary unavailable — bounds check skipped" +else + bounds_rc=0 + bounds_out=$(run_with_timeout 30 "$CALLDATA_OBOL" smoke calldata \ + --target "$FIXED_TARGET" \ + --run-id "$FIXED_RUN_ID" \ + --response 101 \ + --network base-sepolia 2>&1) || bounds_rc=$? + if [ "$bounds_rc" -ne 0 ]; then + pass "--response 101 rejected (the deployed registry reverts above 100)" + else + fail "--response 101 was accepted — ${bounds_out:0:200}" + fi +fi + +emit_metrics diff --git a/flows/hf-surface-smoke.sh b/flows/hf-surface-smoke.sh new file mode 100755 index 00000000..0dcb4e1c --- /dev/null +++ b/flows/hf-surface-smoke.sh @@ -0,0 +1,201 @@ +#!/usr/bin/env bash +# HF-surface smoke — validate the "decentralised Hugging Face" surfaces the +# obol-stack ships, end to end: +# +# 1. Dataset Hub anonymize -> sign a version -> publish (gated) -> buy +# (resumable, whole-file-hash-verified download) +# 2. Inference a type=inference offer in the federated catalog +# 3. Fine-tuning run a backend over the BOUGHT dataset on a real GPU box +# (spark), producing run.manifest bound to the dataset's +# content-address (provenance) +# 4. Discovery federate the seller catalog through obol-router and +# assert both the dataset and inference offers surface +# 5. Indexer cross-check against the obol-exex ERC-8004 indexer +# +# Each surface is independent; a missing prerequisite SKIPs, never aborts. +# +# Overridable env: +# OBOL_BIN path to a built obol (default: build from this tree) +# ROUTER_BIN path to a built obol-router (default: /tmp/obol-router) +# SPARK ssh host for the fine-tune (default: spark1; "" to skip) +# INDEXER_DIR obol-exex-indexer checkout (default: sibling repo) +set -uo pipefail + +ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +WORK="$(mktemp -d)" +trap 'jobs -p | xargs -r kill 2>/dev/null; rm -rf "$WORK"' EXIT + +OBOL_BIN="${OBOL_BIN:-$WORK/obol}" +ROUTER_BIN="${ROUTER_BIN:-/tmp/obol-router}" +SPARK="${SPARK-spark1}" +INDEXER_DIR="${INDEXER_DIR:-$ROOT/../obol-exex-indexer}" +SKILLS="$ROOT/internal/embed/skills" + +declare -a RESULTS +pass() { RESULTS+=("PASS $1"); echo " ✓ $1"; } +skip() { RESULTS+=("SKIP $1 — $2"); echo " - SKIP $1 — $2"; } +fail() { RESULTS+=("FAIL $1 — $2"); echo " ✗ FAIL $1 — $2"; } + +section() { echo; echo "=== $1 ==="; } + +# --- build obol if needed --- +if [ ! -x "$OBOL_BIN" ]; then + echo "Building obol …" + ( cd "$ROOT" && go build -o "$OBOL_BIN" ./cmd/obol ) || { echo "obol build failed"; exit 1; } +fi +export OBOL_CONFIG_DIR="$WORK/config" +mkdir -p "$OBOL_CONFIG_DIR" + +DS_ID="pi-sessions" +DS_PORT=18951 +SELLER_PORT=18961 +ROUTER_PORT=18971 + +# Free our ports from any orphan left by a prior aborted run (a subshelled +# server can outlive its parent's job-kill), so federation never reads a +# stale 404. +for p in "$DS_PORT" "$SELLER_PORT" "$ROUTER_PORT"; do + lsof -nP -iTCP:"$p" -sTCP:LISTEN -t 2>/dev/null | xargs -r kill 2>/dev/null +done + +# =========================================================================== +section "Surface 1 — Dataset Hub (anonymize → sign → publish → buy → verify)" +# =========================================================================== +BUNDLE="$WORK/bundle"; mkdir -p "$BUNDLE" +cat > "$BUNDLE/raw.jsonl" <<'EOF' +{"messages":[{"role":"user","content":"email me at alice@example.com from 10.0.0.7"},{"role":"assistant","content":"path /Users/bob/notes.txt, key sk-ABCDEF0123456789abcdef"}]} +{"messages":[{"role":"user","content":"summarize the design doc"},{"role":"assistant","content":"the stack ships a dataset hub, inference, and fine-tuning"}]} +EOF + +if python3 "$SKILLS/dataset-anonymize/scripts/anonymize.py" "$BUNDLE/raw.jsonl" "$BUNDLE/sft.jsonl" >/dev/null 2>&1 \ + && ! grep -q 'alice@example.com' "$BUNDLE/sft.jsonl"; then + pass "1a anonymize — PII masked, no raw leak" +else + fail "1a anonymize" "PII leaked or script error" +fi + +HASH=$(shasum -a 256 "$BUNDLE/sft.jsonl" | awk '{print $1}') +printf '{"hash":"%s","files":["sft.jsonl"]}\n' "$HASH" > "$BUNDLE/manifest.json" + +if "$OBOL_BIN" dataset from "$BUNDLE" --name "$DS_ID" >/dev/null 2>&1 \ + && "$OBOL_BIN" dataset verify "$DS_ID" 2>&1 | grep -q 'Chain valid'; then + pass "1b sign + verify — signed version chain valid" +else + fail "1b sign+verify" "version not recorded or chain invalid" +fi + +MANIFEST_HASH=$(python3 -c "import json;print(json.load(open('$OBOL_CONFIG_DIR/dataset-serve/$DS_ID.store.json'))['versions'][0]['manifestHash'])") + +"$OBOL_BIN" dataset publish "$DS_ID" --membership open --port "$DS_PORT" --no-tunnel >/dev/null 2>&1 & +curl -sf --retry 25 --retry-connrefused --retry-delay 1 "http://127.0.0.1:$DS_PORT/healthz" >/dev/null +OWNER=$(python3 -c "import json;print(json.load(open('$OBOL_CONFIG_DIR/dataset-serve/$DS_ID.state.json'))['owner_token'])" 2>/dev/null) + +if [ -n "$OWNER" ] && "$OBOL_BIN" buy dataset "http://127.0.0.1:$DS_PORT" --id "$DS_ID" --version 1 \ + --member-token "$OWNER" --out "$WORK/bought.jsonl" >/dev/null 2>&1 \ + && diff -q "$WORK/bought.jsonl" "$BUNDLE/sft.jsonl" >/dev/null; then + pass "1c buy — resumable download byte-identical + hash-verified" +else + fail "1c buy" "download/verify mismatch" +fi + +# =========================================================================== +section "Surface 2 — Inference offer (federated catalog entry)" +# =========================================================================== +# Build a seller /api/services.json carrying BOTH a type=dataset entry (the +# real signed version above) and a type=inference entry. +mkdir -p "$WORK/seller/api" +DS_SIZE=$(wc -c < "$BUNDLE/sft.jsonl" | tr -d ' ') +cat > "$WORK/seller/api/services.json" </dev/null 2>&1; then + skip "3 fine-tune" "$SPARK unreachable" +else + RDIR="/tmp/obol-ft-smoke.$$" + ssh "$SPARK" "mkdir -p $RDIR/out" >/dev/null 2>&1 + scp -q "$WORK/bought.jsonl" "$SPARK:$RDIR/ds.jsonl" + scp -q "$SKILLS/finetune-backend/scripts/runner.py" "$SPARK:$RDIR/runner.py" + # mock backend: no framework needed, validates contract + provenance on real hw + if ssh "$SPARK" "cd $RDIR && python3 runner.py --backend mock --dataset ds.jsonl --base-model qwen2.5-0.5b --manifest-hash $MANIFEST_HASH --out out" >/dev/null 2>&1; then + BOUND=$(ssh "$SPARK" "python3 -c \"import json;print(json.load(open('$RDIR/out/run.manifest'))['dataset_hash'])\"" 2>/dev/null) + ARCH=$(ssh "$SPARK" 'uname -m' 2>/dev/null) + if [ "$BOUND" = "$MANIFEST_HASH" ]; then + pass "3 fine-tune on $SPARK ($ARCH) — run.manifest dataset_hash == bought manifestHash" + else + fail "3 fine-tune" "provenance mismatch ($BOUND != $MANIFEST_HASH)" + fi + ssh "$SPARK" "rm -rf $RDIR" >/dev/null 2>&1 + else + skip "3 fine-tune" "runner failed on $SPARK" + fi +fi + +# =========================================================================== +section "Surface 4 — Discovery via obol-router (federation)" +# =========================================================================== +if [ ! -x "$ROUTER_BIN" ]; then + skip "4 router discovery" "obol-router not built at $ROUTER_BIN" +else + ( cd "$WORK/seller" && exec python3 -m http.server "$SELLER_PORT" >/dev/null 2>&1 ) & + curl -sf --retry 15 --retry-connrefused --retry-delay 1 "http://127.0.0.1:$SELLER_PORT/api/services.json" >/dev/null + # The router federates members' /api/services.json and serves the merge at + # GET /api/services.json. PAY_TO/FACILITATOR/BUYER_KEY are required for its + # x402 routing path but unused by this discovery-only check (dummy values). + OBOL_ROUTER_MEMBERS="seller1=http://127.0.0.1:$SELLER_PORT" \ + PORT="$ROUTER_PORT" \ + ROUTER_CHAIN="base-sepolia" \ + ROUTER_PAY_TO="0x1111111111111111111111111111111111111111" \ + ROUTER_FACILITATOR_URL="http://127.0.0.1:1" \ + ROUTER_BUYER_KEY_HEX="ac0974bec39a17e36ba4a6b4d238ff944bacb478cbed5efcae784d7bf4f2ff80" \ + "$ROUTER_BIN" >"$WORK/router.log" 2>&1 & + curl -sf --retry 20 --retry-connrefused --retry-delay 1 "http://127.0.0.1:$ROUTER_PORT/healthz" >/dev/null 2>&1 + MERGED=$(curl -sf "http://127.0.0.1:$ROUTER_PORT/api/services.json" 2>/dev/null) + if echo "$MERGED" | python3 -c "import json,sys;d=json.load(sys.stdin);t=[e.get('type') for e in (d if isinstance(d,list) else d.get('services',d.get('offers',[])))];assert 'dataset' in t and 'inference' in t,t" 2>/dev/null; then + pass "4 router federated the dataset AND inference offers" + else + fail "4 router discovery" "merged catalog missing dataset/inference (got: $(echo "$MERGED" | head -c 120))" + fi +fi + +# =========================================================================== +section "Surface 5 — Cross-check via the obol-exex ERC-8004 indexer" +# =========================================================================== +if [ ! -d "$INDEXER_DIR" ]; then + skip "5 indexer" "obol-exex-indexer not found at $INDEXER_DIR" +elif ! command -v cargo >/dev/null 2>&1; then + skip "5 indexer" "cargo not installed" +else + if ( cd "$INDEXER_DIR" && cargo test -p indexer-core --quiet ) >/dev/null 2>&1; then + pass "5 indexer-core tests green (ERC-8004 registration parsing / feeds parity)" + else + fail "5 indexer" "indexer-core tests failed" + fi +fi + +# =========================================================================== +section "Summary" +# =========================================================================== +printf '%s\n' "${RESULTS[@]}" +FAILS=$(printf '%s\n' "${RESULTS[@]}" | grep -c '^FAIL' || true) +echo +if [ "$FAILS" -eq 0 ]; then + echo "HF-surface smoke: no failures ✓" + exit 0 +else + echo "HF-surface smoke: $FAILS failure(s) ✗" + exit 1 +fi diff --git a/go.mod b/go.mod index eb8589f5..2c9f2014 100644 --- a/go.mod +++ b/go.mod @@ -6,6 +6,8 @@ require ( github.com/charmbracelet/lipgloss v1.1.0 github.com/cucumber/godog v0.15.1 github.com/decred/dcrd/dcrec/secp256k1/v4 v4.4.0 + github.com/drand/kyber v1.3.2 + github.com/drand/kyber-bls12381 v0.3.4 github.com/dustinkirkland/golang-petname v0.0.0-20240428194347-eebcea082ee0 github.com/ethereum/go-ethereum v1.16.7 github.com/google/go-sev-guest v0.14.1 @@ -40,7 +42,7 @@ require ( github.com/StackExchange/wmi v1.2.1 // indirect github.com/aymanbagabas/go-osc52/v2 v2.0.1 // indirect github.com/beorn7/perks v1.0.1 // indirect - github.com/bits-and-blooms/bitset v1.24.2 // indirect + github.com/bits-and-blooms/bitset v1.24.4 // indirect github.com/cespare/xxhash/v2 v2.3.0 // indirect github.com/charmbracelet/colorprofile v0.2.3-0.20250311203215-f60798e515dc // indirect github.com/charmbracelet/x/ansi v0.9.3 // indirect @@ -81,6 +83,7 @@ require ( github.com/inconshreveable/mousetrap v1.1.0 // indirect github.com/josharian/intern v1.0.0 // indirect github.com/json-iterator/go v1.1.12 // indirect + github.com/kilic/bls12-381 v0.1.0 // indirect github.com/klauspost/compress v1.18.1 // indirect github.com/lucasb-eyer/go-colorful v1.2.0 // indirect github.com/mailru/easyjson v0.7.7 // indirect @@ -93,7 +96,6 @@ require ( github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect github.com/prometheus/procfs v0.16.1 // indirect github.com/rivo/uniseg v0.4.7 // indirect - github.com/rogpeppe/go-internal v1.14.1 // indirect github.com/russross/blackfriday/v2 v2.1.0 // indirect github.com/shirou/gopsutil v3.21.4-0.20210419000835-c7a38de76ee5+incompatible // indirect github.com/spf13/cobra v1.9.1 // indirect diff --git a/go.sum b/go.sum index 60ea8dfb..f3995e12 100644 --- a/go.sum +++ b/go.sum @@ -12,8 +12,8 @@ github.com/aymanbagabas/go-osc52/v2 v2.0.1 h1:HwpRHbFMcZLEVr42D4p7XBqjyuxQH5SMiE github.com/aymanbagabas/go-osc52/v2 v2.0.1/go.mod h1:uYgXzlJ7ZpABp8OJ+exZzJJhRNQ2ASbcXHWsFqH8hp8= github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= -github.com/bits-and-blooms/bitset v1.24.2 h1:M7/NzVbsytmtfHbumG+K2bremQPMJuqv1JD3vOaFxp0= -github.com/bits-and-blooms/bitset v1.24.2/go.mod h1:7hO7Gc7Pp1vODcmWvKMRA9BNmbv6a/7QIWpPxHddWR8= +github.com/bits-and-blooms/bitset v1.24.4 h1:95H15Og1clikBrKr/DuzMXkQzECs1M6hhoGXLwLQOZE= +github.com/bits-and-blooms/bitset v1.24.4/go.mod h1:7hO7Gc7Pp1vODcmWvKMRA9BNmbv6a/7QIWpPxHddWR8= github.com/cespare/cp v0.1.0 h1:SE+dxFebS7Iik5LK0tsi1k9ZCxEaFX4AjQmoyA+1dJk= github.com/cespare/cp v0.1.0/go.mod h1:SOGHArjBr4JWaSDEVpWpo/hNg6RoKrls6Oh40hiwW+s= github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= @@ -74,6 +74,10 @@ github.com/deepmap/oapi-codegen v1.6.0 h1:w/d1ntwh91XI0b/8ja7+u5SvA4IFfM0UNNLmiD github.com/deepmap/oapi-codegen v1.6.0/go.mod h1:ryDa9AgbELGeB+YEXE1dR53yAjHwFvE9iAUlWl9Al3M= github.com/dlclark/regexp2 v1.11.0 h1:G/nrcoOa7ZXlpoa/91N3X7mM3r8eIlMBBJZvsz/mxKI= github.com/dlclark/regexp2 v1.11.0/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8= +github.com/drand/kyber v1.3.2 h1:Cf3NNcb5bV3eODopr3XVHzImjDK40GiObhFUFG93Zeo= +github.com/drand/kyber v1.3.2/go.mod h1:ciDFWoC7ajb89niGJnS4C1Xeo4lSJMmbi+km5w8juAI= +github.com/drand/kyber-bls12381 v0.3.4 h1:rrmYcRcXmtOAvKWVBxRQxi22qNMVcS2Jz7MAebZQJxI= +github.com/drand/kyber-bls12381 v0.3.4/go.mod h1:jh3IGIAQfdLrdNKYz1HWZ3YdfJM0DWlN1TxXkh60utk= github.com/dustinkirkland/golang-petname v0.0.0-20240428194347-eebcea082ee0 h1:aYo8nnk3ojoQkP5iErif5Xxv0Mo0Ga/FR5+ffl/7+Nk= github.com/dustinkirkland/golang-petname v0.0.0-20240428194347-eebcea082ee0/go.mod h1:8AuBTZBRSFqEYBPYULd+NN474/zZBLP+6WeT5S9xlAc= github.com/emicklei/dot v1.6.2 h1:08GN+DD79cy/tzN6uLCT84+2Wk9u+wvqP+Hkx/dIR8A= @@ -195,6 +199,8 @@ github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8Hm github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y= github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= +github.com/kilic/bls12-381 v0.1.0 h1:encrdjqKMEvabVQ7qYOKu1OvhqpK4s47wDYtNiPtlp4= +github.com/kilic/bls12-381 v0.1.0/go.mod h1:vDTTHJONJ6G+P2R74EhnyotQDTliQDnFEwhdmfzw1ig= github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= github.com/klauspost/compress v1.18.1 h1:bcSGx7UbpBqMChDtsF28Lw6v/G94LPrrbMbdC3JH2co= @@ -342,6 +348,8 @@ github.com/yosida95/uritemplate/v3 v3.0.2 h1:Ed3Oyj9yrmi9087+NczuL5BwkIc4wvTb5zI github.com/yosida95/uritemplate/v3 v3.0.2/go.mod h1:ILOh0sOhIJR3+L/8afwt/kE++YT040gmv5BQTMR2HP4= github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +go.dedis.ch/fixbuf v1.0.3 h1:hGcV9Cd/znUxlusJ64eAlExS+5cJDIyTyEG+otu5wQs= +go.dedis.ch/fixbuf v1.0.3/go.mod h1:yzJMt34Wa5xD37V5RTdmp38cz3QhMagdGoem9anUalw= go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0= @@ -380,6 +388,7 @@ golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5h golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190916202348-b4ddaad3f8a3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20201101102859-da207088b7d1/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210426230700-d19ff857e887/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= diff --git a/internal/bounty/decay.go b/internal/bounty/decay.go new file mode 100644 index 00000000..2da487c1 --- /dev/null +++ b/internal/bounty/decay.go @@ -0,0 +1,67 @@ +package bounty + +// Reputation decay (design doc §11.4): ladder weight earned by an evaluator +// halves every decayHalfLife of inactivity past lastEvalAt. These are PURE +// read-time functions — nothing here mutates ladder status. Stored records +// keep their raw counters; decay is applied only where reputation is READ +// (selection weights and tier gating), so an evaluator who returns from a +// long idle resumes from their stored counters, just with less pull until +// they participate again. + +import ( + "math" + "time" + + "github.com/ObolNetwork/obol-stack/internal/monetizeapi" +) + +// defaultDecayHalfLife mirrors applyLadderDefaults (registry.go) for callers +// holding a zero/unparseable Ladder. +const defaultDecayHalfLife = 720 * time.Hour + +// DecayHalfLifeDuration parses the ladder's decayHalfLife knob, falling back +// to the registry default (720h) when it is missing or unparseable. +func (l Ladder) DecayHalfLifeDuration() time.Duration { + if d, err := time.ParseDuration(l.DecayHalfLife); err == nil && d > 0 { + return d + } + return defaultDecayHalfLife +} + +// EffectiveCompleted is the decayed completion count: +// +// completed × 2^(−idle/halfLife) +// +// where idle = now − lastEvalAt. A nil lastEvalAt is a legacy record from +// before decay landed — there is no anchor to decay from, so it is taken at +// face value. +func EffectiveCompleted(completed int, lastEvalAt *time.Time, now time.Time, halfLife time.Duration) float64 { + if lastEvalAt == nil || halfLife <= 0 { + return float64(completed) + } + idle := now.Sub(*lastEvalAt) + if idle <= 0 { + return float64(completed) + } + return float64(completed) * math.Exp2(-float64(idle)/float64(halfLife)) +} + +// EffectiveTier is the read-time tier gate: a stored "Full" record whose +// decayed completion count has fallen below the task's probation threshold +// AND whose idle time exceeds the half-life is treated as Probation for +// selection purposes — stale reputation buys a discounted seat, not a full +// one. Every other case returns the stored tier unchanged (legacy records +// with no lastEvalAt anchor are never demoted). +func EffectiveTier(record monetizeapi.EvaluatorLadderRecord, ladder Ladder, now time.Time) string { + if record.Tier != monetizeapi.EvaluatorTierFull || record.LastEvalAt == nil { + return record.Tier + } + halfLife := ladder.DecayHalfLifeDuration() + if now.Sub(record.LastEvalAt.Time) <= halfLife { + return record.Tier + } + if EffectiveCompleted(int(record.Completed), &record.LastEvalAt.Time, now, halfLife) < float64(ladder.ProbationEvals) { + return monetizeapi.EvaluatorTierProbation + } + return record.Tier +} diff --git a/internal/bounty/decay_test.go b/internal/bounty/decay_test.go new file mode 100644 index 00000000..8ba44a42 --- /dev/null +++ b/internal/bounty/decay_test.go @@ -0,0 +1,115 @@ +package bounty + +import ( + "math" + "testing" + "time" + + "github.com/ObolNetwork/obol-stack/internal/monetizeapi" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +const halfLife = 720 * time.Hour + +func TestEffectiveCompleted_HalvesAfterOneHalfLife(t *testing.T) { + now := time.Date(2026, 6, 10, 0, 0, 0, 0, time.UTC) + last := now.Add(-halfLife) + got := EffectiveCompleted(10, &last, now, halfLife) + if math.Abs(got-5.0) > 1e-9 { + t.Fatalf("EffectiveCompleted after one half-life = %v, want 5.0", got) + } + last2 := now.Add(-2 * halfLife) + if got := EffectiveCompleted(10, &last2, now, halfLife); math.Abs(got-2.5) > 1e-9 { + t.Fatalf("EffectiveCompleted after two half-lives = %v, want 2.5", got) + } +} + +func TestEffectiveCompleted_NilLastEvalNoDecay(t *testing.T) { + now := time.Now() + if got := EffectiveCompleted(10, nil, now, halfLife); got != 10.0 { + t.Fatalf("legacy record (nil lastEvalAt) must not decay, got %v", got) + } +} + +func TestEffectiveCompleted_FreshAndZeroHalfLife(t *testing.T) { + now := time.Now() + fresh := now + if got := EffectiveCompleted(7, &fresh, now, halfLife); got != 7.0 { + t.Fatalf("zero idle must not decay, got %v", got) + } + future := now.Add(time.Hour) + if got := EffectiveCompleted(7, &future, now, halfLife); got != 7.0 { + t.Fatalf("clock-skewed future lastEvalAt must not decay, got %v", got) + } + old := now.Add(-halfLife) + if got := EffectiveCompleted(7, &old, now, 0); got != 7.0 { + t.Fatalf("non-positive half-life must disable decay, got %v", got) + } +} + +func TestEffectiveTier_StaleFullDemotedToProbation(t *testing.T) { + now := time.Date(2026, 6, 10, 0, 0, 0, 0, time.UTC) + ladder := Ladder{ProbationEvals: 10, DecayHalfLife: "720h"} + record := monetizeapi.EvaluatorLadderRecord{ + Tier: monetizeapi.EvaluatorTierFull, + Completed: 10, + LastEvalAt: &metav1.Time{Time: now.Add(-2 * halfLife)}, // effective 2.5 < 10 + } + if got := EffectiveTier(record, ladder, now); got != monetizeapi.EvaluatorTierProbation { + t.Fatalf("stale Full must read as Probation, got %s", got) + } +} + +func TestEffectiveTier_FreshFullStaysFull(t *testing.T) { + now := time.Now() + ladder := Ladder{ProbationEvals: 10, DecayHalfLife: "720h"} + record := monetizeapi.EvaluatorLadderRecord{ + Tier: monetizeapi.EvaluatorTierFull, + Completed: 10, + LastEvalAt: &metav1.Time{Time: now.Add(-halfLife / 2)}, // idle under the half-life + } + if got := EffectiveTier(record, ladder, now); got != monetizeapi.EvaluatorTierFull { + t.Fatalf("Full within the half-life must stay Full, got %s", got) + } +} + +func TestEffectiveTier_HighVolumeFullSurvivesIdle(t *testing.T) { + now := time.Now() + ladder := Ladder{ProbationEvals: 10, DecayHalfLife: "720h"} + record := monetizeapi.EvaluatorLadderRecord{ + Tier: monetizeapi.EvaluatorTierFull, + Completed: 100, // effective 25 after two half-lives, still ≥ 10 + LastEvalAt: &metav1.Time{Time: now.Add(-2 * halfLife)}, + } + if got := EffectiveTier(record, ladder, now); got != monetizeapi.EvaluatorTierFull { + t.Fatalf("high-volume Full must survive the idle window, got %s", got) + } +} + +func TestEffectiveTier_LegacyAndNonFullUntouched(t *testing.T) { + now := time.Now() + ladder := Ladder{ProbationEvals: 10, DecayHalfLife: "720h"} + legacy := monetizeapi.EvaluatorLadderRecord{Tier: monetizeapi.EvaluatorTierFull, Completed: 1} + if got := EffectiveTier(legacy, ladder, now); got != monetizeapi.EvaluatorTierFull { + t.Fatalf("legacy record (nil lastEvalAt) must keep its stored tier, got %s", got) + } + shadow := monetizeapi.EvaluatorLadderRecord{ + Tier: monetizeapi.EvaluatorTierShadow, + LastEvalAt: &metav1.Time{Time: now.Add(-10 * halfLife)}, + } + if got := EffectiveTier(shadow, ladder, now); got != monetizeapi.EvaluatorTierShadow { + t.Fatalf("non-Full tiers are never demoted further, got %s", got) + } +} + +func TestDecayHalfLifeDuration(t *testing.T) { + if got := (Ladder{}).DecayHalfLifeDuration(); got != defaultDecayHalfLife { + t.Fatalf("zero ladder must default to %v, got %v", defaultDecayHalfLife, got) + } + if got := (Ladder{DecayHalfLife: "48h"}).DecayHalfLifeDuration(); got != 48*time.Hour { + t.Fatalf("parseable half-life = %v, want 48h", got) + } + if got := (Ladder{DecayHalfLife: "soon"}).DecayHalfLifeDuration(); got != defaultDecayHalfLife { + t.Fatalf("unparseable half-life must default, got %v", got) + } +} diff --git a/internal/bounty/registry.go b/internal/bounty/registry.go new file mode 100644 index 00000000..bba19057 --- /dev/null +++ b/internal/bounty/registry.go @@ -0,0 +1,246 @@ +// Package bounty loads the embedded, versioned ServiceBounty task-type +// packages (internal/embed/bountytasks//task.yaml). A task type is a +// self-describing unit — param schema, eval method + tolerance, OBOL eval +// pricing, hardware-proof policy, and the A2UI report schema — discovered +// dynamically the same way networks are (internal/embed/networks). Adding a +// task type is dropping in a directory; the CRD and controller never change. +package bounty + +import ( + "fmt" + "sort" + + "gopkg.in/yaml.v3" + + "github.com/ObolNetwork/obol-stack/internal/embed" +) + +// Param is one knob in a task type's schema. It generates a CLI flag for +// `obol bounty post ` and is validated against spec.task.params. +type Param struct { + Name string `yaml:"name"` + Type string `yaml:"type"` // string | int | enum + Default string `yaml:"default"` + Enum []string `yaml:"enum"` + Required bool `yaml:"required"` // missing/empty value rejects the bounty at admission + Description string `yaml:"description"` +} + +// EvalPayment is the OBOL-denominated evaluator payment leg (separate from the +// reward — x402 cannot splice a fee out of the reward auth). +type EvalPayment struct { + Asset string `yaml:"asset"` + PerEvaluator string `yaml:"perEvaluator"` + FundedBy string `yaml:"fundedBy"` + Settle string `yaml:"settle"` +} + +// Ladder is the evaluator cold-start ladder (design doc §11.4): Shadow (free, +// randomly assigned, graded against the quorum median but never counted) → +// Probation (one reserved quorum seat at reduced pay, value-capped bounties +// only) → Full. Thresholds are per-task-type constants, not protocol globals. +type Ladder struct { + // ShadowAgreements within tolerance of the quorum median promote a + // shadow evaluator to Probation. + ShadowAgreements int `yaml:"shadowAgreements"` + + // ProbationEvals without divergence promote a probationer to Full. + ProbationEvals int `yaml:"probationEvals"` + + // ProbationValueCap is the reward (human units) above which no probation + // seat is offered — high-value bounties get an all-Full quorum. + ProbationValueCap string `yaml:"probationValueCap"` + + // RevealWindow is the commit→reveal duration; every commit closes before + // any reveal opens (selective-revelation guard). + RevealWindow string `yaml:"revealWindow"` + + // NonRevealPenalty grades a missing reveal; "outlier" treats it as a + // worst-case divergence so silent abstention is never the cheap exit. + NonRevealPenalty string `yaml:"nonRevealPenalty"` + + // DecayHalfLife is the reputation half-life: ladder weight earned by an + // evaluator halves every window of inactivity past lastEvalAt. + DecayHalfLife string `yaml:"decayHalfLife"` + + // EscalationWindow is the second-round commit→reveal duration when a + // diverged quorum escalates to a fresh, larger panel. + EscalationWindow string `yaml:"escalationWindow"` + + // EscalationEpsilon is the knife-edge band: when the quorum median lands + // within epsilon score points of the pass threshold, the verdict + // escalates to a fresh 2k+1 panel instead of settling. 0 means "unset" + // and backfills to the default (5); use a NEGATIVE value to disable the + // knife-edge trigger for a task package. + EscalationEpsilon int `yaml:"escalationEpsilon"` +} + +type Eval struct { + DefaultK int `yaml:"defaultK"` + Selection string `yaml:"selection"` + Payment EvalPayment `yaml:"payment"` + Ladder Ladder `yaml:"ladder"` +} + +type Acceptance struct { + Method string `yaml:"method"` + CommitReveal bool `yaml:"commitReveal"` + Tolerance map[string]string `yaml:"tolerance"` +} + +type Artifact struct { + Name string `yaml:"name"` + Kind string `yaml:"kind"` + Required bool `yaml:"required"` +} + +// ReportVariant is one A2UI rendering of the deliverable's result data. +// kind=declarative is the lean default: an operations file (create_surface → +// update_components → update_data_model) the client renders natively from its +// compiled-in catalog — no custom code, no iframes. kind=mcp-app is the +// MCP-Apps escape hatch: `surface` is self-contained HTML served url_encoded +// inside an A2UI `custom` McpApp node's properties.content; the CLIENT +// supplies the double-iframe isolation (sandbox proxy + srcdoc inner frame, +// never allow-same-origin) — the server only ever returns JSON. +type ReportVariant struct { + Kind string `yaml:"kind"` // declarative | mcp-app + Surface string `yaml:"surface"` // file in the task package + CatalogID string `yaml:"catalogId"` // stable id negotiated against the client's supportedCatalogIds +} + +// Report carries the variants in preference order. The serving side (FE +// locally, the stack MCP server cross-party) picks the first variant whose +// catalogId the client advertises (a2ui catalog negotiation, locked per +// surface); a client matching nothing falls back to the raw artifacts. +type Report struct { + Variants []ReportVariant `yaml:"variants"` +} + +type Deliverable struct { + Report Report `yaml:"report"` + Gate string `yaml:"gate"` // local | mcp-x402 | sign-in-with-x + Artifacts []Artifact `yaml:"artifacts"` +} + +// TaskType is a parsed task-type package. +type TaskType struct { + ID string `yaml:"id"` + Version int `yaml:"version"` + Runner string `yaml:"runner"` + Enabled bool `yaml:"enabled"` + Summary string `yaml:"summary"` + Requires []string `yaml:"requires"` + Params []Param `yaml:"params"` + Acceptance Acceptance `yaml:"acceptance"` + Eval Eval `yaml:"eval"` + HardwareProof string `yaml:"hardwareProof"` + Deliverable Deliverable `yaml:"deliverable"` +} + +// Ref is the portable, versioned reference written into +// ServiceBounty.spec.task.typeRef, e.g. "benchmark@v1". +func (t TaskType) Ref() string { + return fmt.Sprintf("%s@v%d", t.ID, t.Version) +} + +// Load reads and parses a single embedded task-type package by directory name. +func Load(name string) (TaskType, error) { + raw, err := embed.ReadEmbeddedBountyTaskFile(name, "task.yaml") + if err != nil { + return TaskType{}, err + } + + var t TaskType + if err := yaml.Unmarshal(raw, &t); err != nil { + return TaskType{}, fmt.Errorf("parse task type %q: %w", name, err) + } + + if t.ID == "" { + return TaskType{}, fmt.Errorf("task type %q: missing id", name) + } + + applyLadderDefaults(&t.Eval.Ladder) + + return t, nil +} + +// applyLadderDefaults backfills ladder knobs a task package omits, so older +// packages keep working when the ladder grows a field. +func applyLadderDefaults(l *Ladder) { + if l.DecayHalfLife == "" { + l.DecayHalfLife = "720h" + } + if l.EscalationWindow == "" { + l.EscalationWindow = "30m" + } + if l.EscalationEpsilon == 0 { + l.EscalationEpsilon = 5 + } +} + +// Available returns every embedded task type (enabled or not), sorted by id. +func Available() ([]TaskType, error) { + names, err := embed.GetAvailableBountyTasks() + if err != nil { + return nil, err + } + + tasks := make([]TaskType, 0, len(names)) + for _, name := range names { + t, err := Load(name) + if err != nil { + return nil, err + } + tasks = append(tasks, t) + } + + sort.Slice(tasks, func(i, j int) bool { return tasks[i].ID < tasks[j].ID }) + + return tasks, nil +} + +// Enabled returns only the task types live in this release. Shipping a type +// with enabled:false stages it (e.g. finetune) before it is turned on. +func Enabled() ([]TaskType, error) { + all, err := Available() + if err != nil { + return nil, err + } + + enabled := make([]TaskType, 0, len(all)) + for _, t := range all { + if t.Enabled { + enabled = append(enabled, t) + } + } + + return enabled, nil +} + +// Resolve resolves an `id` ("benchmark") or a versioned ref ("benchmark@v1") +// to its task type. It errors if the type is unknown or disabled. +func Resolve(ref string) (TaskType, error) { + id := ref + for i := 0; i < len(ref); i++ { + if ref[i] == '@' { + id = ref[:i] + break + } + } + + all, err := Available() + if err != nil { + return TaskType{}, err + } + + for _, t := range all { + if t.ID == id { + if !t.Enabled { + return TaskType{}, fmt.Errorf("task type %q is not enabled in this release", id) + } + return t, nil + } + } + + return TaskType{}, fmt.Errorf("unknown task type %q", ref) +} diff --git a/internal/bounty/registry_test.go b/internal/bounty/registry_test.go new file mode 100644 index 00000000..3c57699c --- /dev/null +++ b/internal/bounty/registry_test.go @@ -0,0 +1,205 @@ +package bounty + +import ( + "testing" + + "github.com/ObolNetwork/obol-stack/internal/embed" +) + +func TestEnabled_IncludesBenchmark(t *testing.T) { + types, err := Enabled() + if err != nil { + t.Fatalf("Enabled: %v", err) + } + + var bench *TaskType + for i := range types { + if types[i].ID == "benchmark" { + bench = &types[i] + break + } + } + if bench == nil { + t.Fatalf("benchmark task type not enabled; got %d types", len(types)) + } + + if got := bench.Ref(); got != "benchmark@v1" { + t.Errorf("Ref() = %q, want benchmark@v1", got) + } + if bench.Acceptance.Method != "rerun-tolerance" { + t.Errorf("acceptance.method = %q, want rerun-tolerance (benchmarks are not bit-exact)", bench.Acceptance.Method) + } + if bench.Eval.Payment.Asset != "OBOL" { + t.Errorf("eval paid in %q, want OBOL (separate eval leg)", bench.Eval.Payment.Asset) + } + if bench.Eval.Payment.Settle != "batch-settlement" { + t.Errorf("eval settle = %q, want batch-settlement", bench.Eval.Payment.Settle) + } + if len(bench.Params) == 0 { + t.Error("benchmark has no params; CLI flags would be empty") + } + + // Median-of-k quorum: k must be >=3 whenever a probation seat can be + // occupied — the median absorbing one outlier is what makes the newcomer + // seat verdict-safe (design doc §11.4). + if bench.Eval.DefaultK < 3 { + t.Errorf("eval.defaultK = %d, want >=3 (median-of-k with a probation seat)", bench.Eval.DefaultK) + } + + // Ladder thresholds are per-type constants; zero values would make the + // cold-start ladder unclimbable (no promotions) or the reveal window + // degenerate (no selective-revelation guard). + ladder := bench.Eval.Ladder + if ladder.ShadowAgreements <= 0 { + t.Errorf("ladder.shadowAgreements = %d, want >0", ladder.ShadowAgreements) + } + if ladder.ProbationEvals <= 0 { + t.Errorf("ladder.probationEvals = %d, want >0", ladder.ProbationEvals) + } + if ladder.ProbationValueCap == "" { + t.Error("ladder.probationValueCap is empty; probation seats would be unbounded by value") + } + if ladder.RevealWindow == "" { + t.Error("ladder.revealWindow is empty; commits and reveals would not be separated") + } + if ladder.NonRevealPenalty != "outlier" { + t.Errorf("ladder.nonRevealPenalty = %q, want outlier (non-reveal must cost >= divergence)", ladder.NonRevealPenalty) + } + if ladder.DecayHalfLife != "720h" { + t.Errorf("ladder.decayHalfLife = %q, want 720h (reputation must decay with inactivity)", ladder.DecayHalfLife) + } + if ladder.EscalationWindow != "30m" { + t.Errorf("ladder.escalationWindow = %q, want 30m (escalation rounds need their own reveal window)", ladder.EscalationWindow) + } + if ladder.EscalationEpsilon != 5 { + t.Errorf("ladder.escalationEpsilon = %d, want 5 (diverged quorums must escalate, not settle)", ladder.EscalationEpsilon) + } + + // Report variants drive a2ui catalog negotiation: the first variant whose + // catalogId the client advertises wins. The lean default is declarative; + // the mcp-app variant is what generic MCP-Apps hosts render (the server + // only serves JSON — double-iframe isolation is the client's job). + variants := bench.Deliverable.Report.Variants + if len(variants) < 2 { + t.Fatalf("report has %d variants, want >=2 (declarative + mcp-app)", len(variants)) + } + if variants[0].Kind != "declarative" { + t.Errorf("first variant kind = %q, want declarative (the lean default must win negotiation)", variants[0].Kind) + } + hasMCPApp := false + for _, v := range variants { + if v.Kind == "mcp-app" { + hasMCPApp = true + } + if v.CatalogID == "" { + t.Errorf("variant %s/%s has empty catalogId; negotiation would never select it", v.Kind, v.Surface) + } + if _, err := embed.ReadEmbeddedBountyTaskFile("benchmark", v.Surface); err != nil { + t.Errorf("variant surface %q is not in the embedded package: %v", v.Surface, err) + } + } + if !hasMCPApp { + t.Error("no mcp-app variant; generic MCP-Apps clients would have no rendering") + } +} + +// applyLadderDefaults backfills knobs older task packages omit — without it a +// package missing decayHalfLife/escalationWindow/escalationEpsilon would have +// undecaying reputation and unescalatable verdicts. +func TestApplyLadderDefaults(t *testing.T) { + var l Ladder + applyLadderDefaults(&l) + if l.DecayHalfLife != "720h" { + t.Errorf("default decayHalfLife = %q, want 720h", l.DecayHalfLife) + } + if l.EscalationWindow != "30m" { + t.Errorf("default escalationWindow = %q, want 30m", l.EscalationWindow) + } + if l.EscalationEpsilon != 5 { + t.Errorf("default escalationEpsilon = %d, want 5", l.EscalationEpsilon) + } + + set := Ladder{DecayHalfLife: "24h", EscalationWindow: "1h", EscalationEpsilon: 9} + applyLadderDefaults(&set) + if set.DecayHalfLife != "24h" || set.EscalationWindow != "1h" || set.EscalationEpsilon != 9 { + t.Errorf("explicit ladder values overwritten: %+v", set) + } +} + +func TestResolve(t *testing.T) { + for _, ref := range []string{"benchmark", "benchmark@v1"} { + got, err := Resolve(ref) + if err != nil { + t.Errorf("Resolve(%q): %v", ref, err) + continue + } + if got.ID != "benchmark" { + t.Errorf("Resolve(%q).ID = %q", ref, got.ID) + } + } + + if _, err := Resolve("does-not-exist"); err == nil { + t.Error("Resolve(unknown) should error") + } +} + +// benchlocal@v1 wraps third-party BenchLocal packs — pack code IS the scorer +// and the BenchLocal registry has no checksums, so packCommit MUST be a +// required param: without a byte pin, rerun-tolerance verification is theater. +func TestEnabled_BenchlocalRequiresPackCommit(t *testing.T) { + bl, err := Resolve("benchlocal@v1") + if err != nil { + t.Fatalf("Resolve(benchlocal@v1): %v", err) + } + + required := map[string]bool{} + for _, p := range bl.Params { + if p.Required { + required[p.Name] = true + } + } + for _, name := range []string{"pack", "packVersion", "packCommit"} { + if !required[name] { + t.Errorf("param %s must be required (pack bytes are unpinned without it)", name) + } + } + if _, ok := bl.Acceptance.Tolerance["totalScore"]; !ok { + t.Error("benchlocal tolerance must key on totalScore (the BenchmarkScore primary metric)") + } +} + +// finetune@v1 ships staged: present in Available (schema reviewable), absent +// from Enabled (not postable), refused by Resolve (not claimable/admittable). +// This is the registry's whole staging mechanism — pin it. +func TestStaging_FinetuneShippedButDisabled(t *testing.T) { + all, err := Available() + if err != nil { + t.Fatalf("Available: %v", err) + } + var staged *TaskType + for i := range all { + if all[i].ID == "finetune" { + staged = &all[i] + } + } + if staged == nil { + t.Fatal("finetune package missing from Available — staging mechanism has nothing staged") + } + if staged.Enabled { + t.Fatal("finetune must ship enabled:false until the MLX-LoRA runner + held-out re-eval land") + } + + enabled, err := Enabled() + if err != nil { + t.Fatalf("Enabled: %v", err) + } + for _, e := range enabled { + if e.ID == "finetune" { + t.Error("Enabled() must exclude disabled packages") + } + } + + if _, err := Resolve("finetune"); err == nil { + t.Error("Resolve(finetune) must refuse disabled types at admission") + } +} diff --git a/internal/dataset/artifacts.go b/internal/dataset/artifacts.go new file mode 100644 index 00000000..adef340a --- /dev/null +++ b/internal/dataset/artifacts.go @@ -0,0 +1,56 @@ +package dataset + +import ( + "fmt" + "io" + "os" + "sync" + "time" +) + +// Artifacts resolves the raw bytes of a dataset version for serving. The +// returned reader is seekable so the HTTP layer (http.ServeContent) can honor +// Range requests; the caller closes it. +type Artifacts interface { + Open(version int) (content io.ReadSeeker, modtime time.Time, closeFn func() error, err error) +} + +// FileArtifacts serves each version from a file on disk. Versions are +// registered as they are published (Set), so a snapshot's bytes stay pinned +// to the path that priced and signed it. +type FileArtifacts struct { + mu sync.RWMutex + paths map[int]string +} + +// NewFileArtifacts returns an empty file-backed artifact store. +func NewFileArtifacts() *FileArtifacts { + return &FileArtifacts{paths: map[int]string{}} +} + +// Set registers the file path serving a version. +func (f *FileArtifacts) Set(version int, path string) { + f.mu.Lock() + defer f.mu.Unlock() + f.paths[version] = path +} + +// Open implements Artifacts. +func (f *FileArtifacts) Open(version int) (io.ReadSeeker, time.Time, func() error, error) { + f.mu.RLock() + path, ok := f.paths[version] + f.mu.RUnlock() + if !ok { + return nil, time.Time{}, nil, fmt.Errorf("dataset: no artifact registered for version %d", version) + } + file, err := os.Open(path) + if err != nil { + return nil, time.Time{}, nil, fmt.Errorf("dataset: open artifact v%d: %w", version, err) + } + info, err := file.Stat() + if err != nil { + file.Close() + return nil, time.Time{}, nil, fmt.Errorf("dataset: stat artifact v%d: %w", version, err) + } + return file, info.ModTime(), file.Close, nil +} diff --git a/internal/dataset/bundle.go b/internal/dataset/bundle.go new file mode 100644 index 00000000..84765380 --- /dev/null +++ b/internal/dataset/bundle.go @@ -0,0 +1,83 @@ +package dataset + +import ( + "crypto/sha256" + "encoding/hex" + "encoding/json" + "fmt" + "io" + "os" + "path/filepath" + "strings" +) + +// BundleManifest is the minimal shape read from a dataset bundle directory's +// manifest.json: a content-address hash and the list of artifact file names. +// (The bundle is produced by an external dataset exporter; only this +// generic envelope is consumed here.) +type BundleManifest struct { + Hash string `json:"hash"` + Files []string `json:"files"` +} + +// ReadBundle reads /manifest.json, resolves the primary training +// artifact (a .jsonl file, preferring an instruction/sft-style name), and +// returns the manifest hash (the content-address anchor), the artifact's +// absolute path, its whole-file SHA-256, and its byte size. +func ReadBundle(dir string) (manifestHash, artifactPath, fileHash string, size int64, err error) { + data, err := os.ReadFile(filepath.Join(dir, "manifest.json")) + if err != nil { + return "", "", "", 0, fmt.Errorf("dataset: read manifest: %w", err) + } + var m BundleManifest + if err := json.Unmarshal(data, &m); err != nil { + return "", "", "", 0, fmt.Errorf("dataset: parse manifest: %w", err) + } + if len(m.Hash) != 64 { + return "", "", "", 0, fmt.Errorf("dataset: manifest hash must be 64 hex chars, got %d", len(m.Hash)) + } + artifact := pickArtifact(m.Files) + if artifact == "" { + return "", "", "", 0, fmt.Errorf("dataset: no .jsonl artifact listed in manifest") + } + artifactPath = filepath.Join(dir, artifact) + fileHash, size, err = hashFile(artifactPath) + if err != nil { + return "", "", "", 0, err + } + return strings.ToLower(m.Hash), artifactPath, fileHash, size, nil +} + +// pickArtifact chooses the training file: prefer one whose name signals an +// instruction/sft format, else the first .jsonl, else "". +func pickArtifact(files []string) string { + var firstJSONL string + for _, f := range files { + lf := strings.ToLower(f) + if !strings.HasSuffix(lf, ".jsonl") { + continue + } + if firstJSONL == "" { + firstJSONL = f + } + if strings.Contains(lf, "sft") || strings.Contains(lf, "instruct") { + return f + } + } + return firstJSONL +} + +// hashFile returns the lowercase hex SHA-256 and byte size of a file. +func hashFile(path string) (string, int64, error) { + f, err := os.Open(path) + if err != nil { + return "", 0, fmt.Errorf("dataset: open artifact: %w", err) + } + defer f.Close() + h := sha256.New() + n, err := io.Copy(h, f) + if err != nil { + return "", 0, fmt.Errorf("dataset: hash artifact: %w", err) + } + return hex.EncodeToString(h.Sum(nil)), n, nil +} diff --git a/internal/dataset/client.go b/internal/dataset/client.go new file mode 100644 index 00000000..7858b115 --- /dev/null +++ b/internal/dataset/client.go @@ -0,0 +1,131 @@ +package dataset + +import ( + "context" + "fmt" + "io" + "net/http" + "os" + "strconv" + "strings" +) + +// FetchResult reports what a verified download produced. +type FetchResult struct { + Version int + ManifestHash string + FileHash string + Bytes int64 + Resumed bool +} + +// FetchOptions configures a verified, resumable dataset download. +type FetchOptions struct { + BaseURL string // e.g. https://host (no trailing slash, no /dataset suffix) + ID string + Version int // 0 = server head + Token string + OutPath string + Client *http.Client +} + +// Fetch downloads a dataset version to OutPath with HTTP Range resume and +// verifies the whole-file SHA-256 against the X-Dataset-File-Hash header the +// server commits on every response. A partial OutPath+".part" from an earlier +// interrupted run is resumed rather than restarted. The verification is done +// once over the reassembled whole file (the hash is of the whole artifact, +// never a chunk). +func Fetch(ctx context.Context, opts FetchOptions) (FetchResult, error) { + if opts.Client == nil { + opts.Client = http.DefaultClient + } + part := opts.OutPath + ".part" + + have := int64(0) + if fi, err := os.Stat(part); err == nil { + have = fi.Size() + } + resumed := have > 0 + + url := strings.TrimSuffix(opts.BaseURL, "/") + "/dataset/" + opts.ID + "/download" + if opts.Version > 0 { + url += "?version=" + strconv.Itoa(opts.Version) + } + + req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil) + if err != nil { + return FetchResult{}, err + } + req.Header.Set("Authorization", "Bearer "+opts.Token) + if have > 0 { + req.Header.Set("Range", fmt.Sprintf("bytes=%d-", have)) + } + + resp, err := opts.Client.Do(req) + if err != nil { + return FetchResult{}, err + } + defer resp.Body.Close() + + switch resp.StatusCode { + case http.StatusOK: + // Server ignored Range (or fresh start): rewrite from scratch. + have = 0 + resumed = false + case http.StatusPartialContent: + // Append to the existing .part. + default: + body, _ := io.ReadAll(io.LimitReader(resp.Body, 512)) + return FetchResult{}, fmt.Errorf("dataset: download %s -> %d: %s", url, resp.StatusCode, strings.TrimSpace(string(body))) + } + + fileHash := strings.ToLower(resp.Header.Get("X-Dataset-File-Hash")) + manifestHash := strings.ToLower(resp.Header.Get("X-Dataset-Manifest-Hash")) + version, _ := strconv.Atoi(resp.Header.Get("X-Dataset-Version")) + if fileHash == "" { + return FetchResult{}, fmt.Errorf("dataset: server did not advertise X-Dataset-File-Hash; refusing unverifiable download") + } + + flag := os.O_CREATE | os.O_WRONLY + if have > 0 { + flag |= os.O_APPEND + } else { + flag |= os.O_TRUNC + } + f, err := os.OpenFile(part, flag, 0o644) + if err != nil { + return FetchResult{}, err + } + if _, err := io.Copy(f, resp.Body); err != nil { + f.Close() + return FetchResult{}, fmt.Errorf("dataset: stream body: %w", err) + } + if err := f.Close(); err != nil { + return FetchResult{}, err + } + + // Verify the reassembled whole file against the committed hash. + got, size, err := hashFile(part) + if err != nil { + return FetchResult{}, err + } + if got != fileHash { + return FetchResult{}, fmt.Errorf("dataset: file hash mismatch: got %s, advertised %s (corrupt or tampered)", got, fileHash) + } + if err := os.Rename(part, opts.OutPath); err != nil { + return FetchResult{}, fmt.Errorf("dataset: finalize download: %w", err) + } + return FetchResult{Version: version, ManifestHash: manifestHash, FileHash: fileHash, Bytes: size, Resumed: resumed}, nil +} + +// VerifyFile recomputes a file's SHA-256 and compares it to want. +func VerifyFile(path, want string) error { + got, _, err := hashFile(path) + if err != nil { + return err + } + if got != strings.ToLower(want) { + return fmt.Errorf("dataset: hash mismatch: got %s, want %s", got, want) + } + return nil +} diff --git a/internal/dataset/client_test.go b/internal/dataset/client_test.go new file mode 100644 index 00000000..9f237a3f --- /dev/null +++ b/internal/dataset/client_test.go @@ -0,0 +1,115 @@ +package dataset + +import ( + "bytes" + "context" + "net/http" + "net/http/httptest" + "os" + "path/filepath" + "testing" +) + +func TestFetch_DownloadsAndVerifies(t *testing.T) { + ts := newTestServer(t, MembershipOpen, nil) + httpSrv := httptest.NewServer(ts.srv.Handler()) + defer httpSrv.Close() + + out := filepath.Join(t.TempDir(), "ds-v1.jsonl") + res, err := Fetch(context.Background(), FetchOptions{ + BaseURL: httpSrv.URL, + ID: "ds", + Version: 1, + Token: ownerToken, // owner is a download superuser + OutPath: out, + }) + if err != nil { + t.Fatalf("Fetch: %v", err) + } + if res.Version != 1 || res.Resumed { + t.Errorf("result = %+v, want version 1, not resumed", res) + } + got, err := os.ReadFile(out) + if err != nil { + t.Fatalf("read out: %v", err) + } + if !bytes.Equal(got, ts.bytesV1) { + t.Error("downloaded file != artifact") + } + if res.FileHash != sha256hex(ts.bytesV1) { + t.Errorf("result hash = %q, want %q", res.FileHash, sha256hex(ts.bytesV1)) + } + if _, err := os.Stat(out + ".part"); !os.IsNotExist(err) { + t.Error(".part file should be removed after a successful finalize") + } +} + +func TestFetch_ResumesFromPartial(t *testing.T) { + ts := newTestServer(t, MembershipOpen, nil) + httpSrv := httptest.NewServer(ts.srv.Handler()) + defer httpSrv.Close() + + out := filepath.Join(t.TempDir(), "ds.jsonl") + // Simulate an interrupted earlier run: the first 10 bytes already on disk. + if err := os.WriteFile(out+".part", ts.bytesV1[:10], 0o644); err != nil { + t.Fatal(err) + } + + res, err := Fetch(context.Background(), FetchOptions{ + BaseURL: httpSrv.URL, ID: "ds", Version: 1, Token: ownerToken, OutPath: out, + }) + if err != nil { + t.Fatalf("Fetch: %v", err) + } + if !res.Resumed { + t.Error("expected Resumed=true from a pre-existing .part") + } + got, _ := os.ReadFile(out) + if !bytes.Equal(got, ts.bytesV1) { + t.Errorf("resumed file = %q, want full artifact", got) + } +} + +func TestFetch_RejectsHashMismatch(t *testing.T) { + // A malicious/buggy server that serves the wrong bytes but advertises the + // real hash must be caught by the whole-file verification. + real := []byte("the-real-bytes\n") + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + w.Header().Set("X-Dataset-File-Hash", sha256hex(real)) + w.Header().Set("X-Dataset-Version", "1") + _, _ = w.Write([]byte("TAMPERED-DIFFERENT-BYTES\n")) + })) + defer srv.Close() + + out := filepath.Join(t.TempDir(), "ds.jsonl") + _, err := Fetch(context.Background(), FetchOptions{BaseURL: srv.URL, ID: "ds", Version: 1, Token: "t", OutPath: out}) + if err == nil { + t.Fatal("Fetch accepted bytes that don't match the advertised hash") + } + if _, statErr := os.Stat(out); !os.IsNotExist(statErr) { + t.Error("a failed verification must not leave a finalized output file") + } +} + +func TestFetch_RefusesUnverifiableDownload(t *testing.T) { + // No X-Dataset-File-Hash -> refuse (don't write an unverifiable file). + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + _, _ = w.Write([]byte("anything")) + })) + defer srv.Close() + out := filepath.Join(t.TempDir(), "ds.jsonl") + if _, err := Fetch(context.Background(), FetchOptions{BaseURL: srv.URL, ID: "ds", Token: "t", OutPath: out}); err == nil { + t.Error("Fetch accepted a download with no file-hash commitment") + } +} + +func TestVerifyFile(t *testing.T) { + path := filepath.Join(t.TempDir(), "f") + _ = os.WriteFile(path, []byte("abc"), 0o644) + if err := VerifyFile(path, sha256hex([]byte("abc"))); err != nil { + t.Errorf("VerifyFile good: %v", err) + } + if err := VerifyFile(path, sha256hex([]byte("xyz"))); err == nil { + t.Error("VerifyFile should reject a wrong hash") + } +} diff --git a/internal/dataset/coverage_test.go b/internal/dataset/coverage_test.go new file mode 100644 index 00000000..ae5e4e67 --- /dev/null +++ b/internal/dataset/coverage_test.go @@ -0,0 +1,184 @@ +package dataset + +import ( + "bytes" + "encoding/json" + "net/http" + "net/http/httptest" + "os" + "path/filepath" + "testing" +) + +func TestFileArtifacts(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "v1.jsonl") + want := []byte("line1\nline2\n") + if err := os.WriteFile(path, want, 0o644); err != nil { + t.Fatal(err) + } + fa := NewFileArtifacts() + fa.Set(1, path) + + rc, _, closeFn, err := fa.Open(1) + if err != nil { + t.Fatalf("Open(1): %v", err) + } + defer closeFn() + got := make([]byte, len(want)) + if _, err := rc.Read(got); err != nil { + t.Fatalf("read: %v", err) + } + if !bytes.Equal(got, want) { + t.Errorf("read %q, want %q", got, want) + } + if _, _, _, err := fa.Open(2); err == nil { + t.Error("Open(2) should error — no artifact registered") + } + if _, _, _, err := fa.Open(0); err == nil { + t.Error("Open(0) should error") + } +} + +func TestServer_InviteFlow_ApproveThenToken(t *testing.T) { + ts := newTestServer(t, MembershipInvite, nil) + h := ts.srv.Handler() + + // Worker requests a code (NOT auto-approved in invite mode). + cw := do(t, h, "POST", "/auth/device/code", "", nil) + var grant struct { + DeviceCode string `json:"device_code"` + UserCode string `json:"user_code"` + } + _ = json.Unmarshal(cw.Body.Bytes(), &grant) + + // Polling before approval yields authorization_pending (no token). + pw := postJSON(t, h, "/auth/device/token", `{"device_code":"`+grant.DeviceCode+`"}`, "") + if pw.Code != http.StatusOK { + t.Fatalf("pre-approval poll = %d", pw.Code) + } + + // Non-owner cannot approve. + if w := postJSON(t, h, "/auth/device/approve", `{"user_code":"`+grant.UserCode+`"}`, "not-owner"); w.Code != http.StatusUnauthorized { + t.Errorf("non-owner approve = %d, want 401", w.Code) + } + // Owner approves. + if w := postJSON(t, h, "/auth/device/approve", `{"user_code":"`+grant.UserCode+`"}`, ownerToken); w.Code != http.StatusOK { + t.Fatalf("owner approve = %d, body %s", w.Code, w.Body.String()) + } + // Bad approve body. + if w := postJSON(t, h, "/auth/device/approve", `{}`, ownerToken); w.Code != http.StatusBadRequest { + t.Errorf("empty approve = %d, want 400", w.Code) + } + + // Now polling mints a token. + tw := postJSON(t, h, "/auth/device/token", `{"device_code":"`+grant.DeviceCode+`"}`, "") + var tok struct { + Token string `json:"token"` + } + _ = json.Unmarshal(tw.Body.Bytes(), &tok) + if tok.Token == "" { + t.Fatal("no token after approval") + } + + // That member token can read the member-gated versions list. + vw := do(t, h, "GET", "/dataset/ds/versions", tok.Token, nil) + if vw.Code != http.StatusOK { + t.Errorf("member versions = %d, want 200", vw.Code) + } + // Bad device token request. + if w := postJSON(t, h, "/auth/device/token", `{}`, ""); w.Code != http.StatusBadRequest { + t.Errorf("empty device token = %d, want 400", w.Code) + } +} + +func TestServer_ErrorPaths(t *testing.T) { + t.Run("join paid disabled when nil", func(t *testing.T) { + ts := newTestServer(t, MembershipInvite, nil) + w := do(t, ts.srv.Handler(), "POST", "/dataset/ds/join/paid", "", nil) + if w.Code != http.StatusServiceUnavailable { + t.Errorf("= %d, want 503", w.Code) + } + }) + t.Run("join paid rejects payment error", func(t *testing.T) { + ts := newTestServer(t, MembershipInvite, fakePayments{err: errFake}) + w := do(t, ts.srv.Handler(), "POST", "/dataset/ds/join/paid", "", nil) + if w.Code != http.StatusPaymentRequired { + t.Errorf("= %d, want 402", w.Code) + } + }) + t.Run("join paid unknown version", func(t *testing.T) { + ts := newTestServer(t, MembershipInvite, fakePayments{version: 99}) + w := do(t, ts.srv.Handler(), "POST", "/dataset/ds/join/paid", "", nil) + if w.Code != http.StatusBadRequest { + t.Errorf("= %d, want 400", w.Code) + } + }) + t.Run("unknown dataset id 404", func(t *testing.T) { + ts := newTestServer(t, MembershipOpen, nil) + w := do(t, ts.srv.Handler(), "GET", "/dataset/other/versions", ownerToken, nil) + if w.Code != http.StatusNotFound { + t.Errorf("= %d, want 404", w.Code) + } + }) + t.Run("download bad version 404", func(t *testing.T) { + ts := newTestServer(t, MembershipOpen, nil) + h := ts.srv.Handler() + for _, q := range []string{"abc", "0", "99"} { + w := do(t, h, "GET", "/dataset/ds/download?version="+q, ownerToken, nil) + if w.Code != http.StatusNotFound { + t.Errorf("version=%q = %d, want 404", q, w.Code) + } + } + }) + t.Run("download artifact missing", func(t *testing.T) { + // Log has v1 but the artifact source has nothing. + ts := newTestServer(t, MembershipOpen, nil) + ts.srv.artifacts = memArtifacts{data: map[int][]byte{}} + w := do(t, ts.srv.Handler(), "GET", "/dataset/ds/download?version=1", ownerToken, nil) + if w.Code != http.StatusNotFound { + t.Errorf("= %d, want 404", w.Code) + } + }) +} + +func TestRecoverSigner_BadInputs(t *testing.T) { + v := EthVerifier{} + var d [32]byte + if _, err := v.RecoverSigner(d, "zz"); err == nil { + t.Error("non-hex sig accepted") + } + if _, err := v.RecoverSigner(d, "abcd"); err == nil { + t.Error("wrong-length sig accepted") + } +} + +func TestLog_LenAndStorePath(t *testing.T) { + l := NewLog() + if l.Len() != 0 { + t.Errorf("empty Len = %d", l.Len()) + } + s := NewStore("/tmp/x.json") + if s.Path() != "/tmp/x.json" { + t.Errorf("Path = %q", s.Path()) + } +} + +// helpers + +var errFake = &fakeErr{} + +type fakeErr struct{} + +func (*fakeErr) Error() string { return "fake payment failure" } + +func postJSON(t *testing.T, h http.Handler, target, body, token string) *httptest.ResponseRecorder { + t.Helper() + r := httptest.NewRequest("POST", target, bytes.NewReader([]byte(body))) + if token != "" { + r.Header.Set("Authorization", "Bearer "+token) + } + w := httptest.NewRecorder() + h.ServeHTTP(w, r) + return w +} diff --git a/internal/dataset/entitlement.go b/internal/dataset/entitlement.go new file mode 100644 index 00000000..d3934eac --- /dev/null +++ b/internal/dataset/entitlement.go @@ -0,0 +1,77 @@ +package dataset + +import "sync" + +// Entitlement records what a single member token may download. A +// payment-minted token is scoped to the exact version it paid for; an +// owner-admitted worker gets MaxVersion = head (full access). The raw token +// is never stored — only its SHA-256 hash (the same hash groupauth keys on), +// so the entitlement map can be persisted without holding a credential. +type Entitlement struct { + TokenHash string `json:"tokenHash"` + GroupID string `json:"groupID"` + MaxVersion int `json:"maxVersion"` + PaidAtomic string `json:"paidAtomic,omitempty"` + Label string `json:"label,omitempty"` +} + +// Entitlements is the concurrent token-hash -> Entitlement map that enforces +// the version-scope invariant the bare groupauth member() gate cannot express +// (member() only proves group membership, not which version was paid for). +type Entitlements struct { + mu sync.Mutex + byHash map[string]Entitlement +} + +// NewEntitlements returns an empty map. +func NewEntitlements() *Entitlements { + return &Entitlements{byHash: map[string]Entitlement{}} +} + +// Grant records (or overwrites) an entitlement. +func (e *Entitlements) Grant(ent Entitlement) { + e.mu.Lock() + defer e.mu.Unlock() + e.byHash[ent.TokenHash] = ent +} + +// Lookup returns the entitlement for a token hash. +func (e *Entitlements) Lookup(tokenHash string) (Entitlement, bool) { + e.mu.Lock() + defer e.mu.Unlock() + ent, ok := e.byHash[tokenHash] + return ent, ok +} + +// Allows reports whether the token hash may download the given version +// (1 <= version <= MaxVersion). An unknown token is never allowed. +func (e *Entitlements) Allows(tokenHash string, version int) bool { + e.mu.Lock() + defer e.mu.Unlock() + ent, ok := e.byHash[tokenHash] + if !ok { + return false + } + return version >= 1 && version <= ent.MaxVersion +} + +// All returns every entitlement (for persistence), in unspecified order. +func (e *Entitlements) All() []Entitlement { + e.mu.Lock() + defer e.mu.Unlock() + out := make([]Entitlement, 0, len(e.byHash)) + for _, v := range e.byHash { + out = append(out, v) + } + return out +} + +// Load replaces the map from persisted entitlements (rehydration after restart). +func (e *Entitlements) Load(ents []Entitlement) { + e.mu.Lock() + defer e.mu.Unlock() + e.byHash = make(map[string]Entitlement, len(ents)) + for _, ent := range ents { + e.byHash[ent.TokenHash] = ent + } +} diff --git a/internal/dataset/entitlement_test.go b/internal/dataset/entitlement_test.go new file mode 100644 index 00000000..02066b01 --- /dev/null +++ b/internal/dataset/entitlement_test.go @@ -0,0 +1,62 @@ +package dataset + +import ( + "reflect" + "sort" + "testing" +) + +func TestEntitlements_Allows(t *testing.T) { + e := NewEntitlements() + e.Grant(Entitlement{TokenHash: "v2tok", GroupID: "g", MaxVersion: 2}) + e.Grant(Entitlement{TokenHash: "headtok", GroupID: "g", MaxVersion: 5}) + + tests := []struct { + name string + tokenHash string + version int + want bool + }{ + {"paid v2 can fetch v1", "v2tok", 1, true}, + {"paid v2 can fetch v2", "v2tok", 2, true}, + {"paid v2 CANNOT fetch v3", "v2tok", 3, false}, + {"worker head can fetch v5", "headtok", 5, true}, + {"unknown token denied", "ghost", 1, false}, + {"version zero denied", "headtok", 0, false}, + {"negative version denied", "headtok", -1, false}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := e.Allows(tt.tokenHash, tt.version); got != tt.want { + t.Errorf("Allows(%q, %d) = %v, want %v", tt.tokenHash, tt.version, got, tt.want) + } + }) + } +} + +func TestEntitlements_LoadAllRoundTrip(t *testing.T) { + e := NewEntitlements() + in := []Entitlement{ + {TokenHash: "a", GroupID: "g", MaxVersion: 1, PaidAtomic: "1000"}, + {TokenHash: "b", GroupID: "g", MaxVersion: 3}, + } + e.Load(in) + + if ent, ok := e.Lookup("a"); !ok || ent.PaidAtomic != "1000" { + t.Errorf("Lookup(a) = %+v, %v", ent, ok) + } + got := e.All() + sort.Slice(got, func(i, j int) bool { return got[i].TokenHash < got[j].TokenHash }) + if !reflect.DeepEqual(got, in) { + t.Errorf("All() = %+v, want %+v", got, in) + } +} + +func TestEntitlements_GrantOverwrites(t *testing.T) { + e := NewEntitlements() + e.Grant(Entitlement{TokenHash: "a", MaxVersion: 1}) + e.Grant(Entitlement{TokenHash: "a", MaxVersion: 9}) // top-up to a newer version + if !e.Allows("a", 9) { + t.Error("Grant did not overwrite MaxVersion") + } +} diff --git a/internal/dataset/helpers_test.go b/internal/dataset/helpers_test.go new file mode 100644 index 00000000..bf197e79 --- /dev/null +++ b/internal/dataset/helpers_test.go @@ -0,0 +1,89 @@ +package dataset + +import ( + "os" + "path/filepath" + "testing" + + ethcrypto "github.com/ethereum/go-ethereum/crypto" +) + +func TestReadBundle(t *testing.T) { + dir := t.TempDir() + artifact := []byte(`{"messages":[]}` + "\n") + if err := os.WriteFile(filepath.Join(dir, "sft.jsonl"), artifact, 0o644); err != nil { + t.Fatal(err) + } + if err := os.WriteFile(filepath.Join(dir, "manifest.json"), + []byte(`{"hash":"`+hashA+`","files":["chatml.jsonl","sft.jsonl"]}`), 0o644); err != nil { + t.Fatal(err) + } + + mh, path, fh, size, err := ReadBundle(dir) + if err != nil { + t.Fatalf("ReadBundle: %v", err) + } + if mh != hashA { + t.Errorf("manifestHash = %q, want %q", mh, hashA) + } + if filepath.Base(path) != "sft.jsonl" { + t.Errorf("picked %q, want sft.jsonl (instruction-format preference)", filepath.Base(path)) + } + if fh != sha256hex(artifact) { + t.Errorf("fileHash = %q, want %q", fh, sha256hex(artifact)) + } + if size != int64(len(artifact)) { + t.Errorf("size = %d, want %d", size, len(artifact)) + } +} + +func TestReadBundle_Errors(t *testing.T) { + t.Run("missing manifest", func(t *testing.T) { + if _, _, _, _, err := ReadBundle(t.TempDir()); err == nil { + t.Error("missing manifest should error") + } + }) + t.Run("bad hash length", func(t *testing.T) { + dir := t.TempDir() + _ = os.WriteFile(filepath.Join(dir, "manifest.json"), []byte(`{"hash":"abcd","files":["a.jsonl"]}`), 0o644) + if _, _, _, _, err := ReadBundle(dir); err == nil { + t.Error("short hash should error") + } + }) + t.Run("no jsonl artifact", func(t *testing.T) { + dir := t.TempDir() + _ = os.WriteFile(filepath.Join(dir, "manifest.json"), []byte(`{"hash":"`+hashA+`","files":["readme.txt"]}`), 0o644) + if _, _, _, _, err := ReadBundle(dir); err == nil { + t.Error("no jsonl should error") + } + }) +} + +func TestLoadOrCreateKey(t *testing.T) { + path := filepath.Join(t.TempDir(), "keys", "ds.key") + + k1, err := LoadOrCreateKey(path) + if err != nil { + t.Fatalf("create: %v", err) + } + if _, err := os.Stat(path); err != nil { + t.Fatalf("key not persisted: %v", err) + } + // Second load returns the same key (stable owner identity). + k2, err := LoadOrCreateKey(path) + if err != nil { + t.Fatalf("reload: %v", err) + } + a1 := ethcrypto.PubkeyToAddress(k1.PublicKey) + a2 := ethcrypto.PubkeyToAddress(k2.PublicKey) + if a1 != a2 { + t.Errorf("reloaded key changed address: %s != %s", a1.Hex(), a2.Hex()) + } + + // Corrupt file -> error. + bad := filepath.Join(t.TempDir(), "bad.key") + _ = os.WriteFile(bad, []byte("not-a-key"), 0o600) + if _, err := LoadOrCreateKey(bad); err == nil { + t.Error("corrupt key file should error") + } +} diff --git a/internal/dataset/keyfile.go b/internal/dataset/keyfile.go new file mode 100644 index 00000000..19fadf66 --- /dev/null +++ b/internal/dataset/keyfile.go @@ -0,0 +1,43 @@ +package dataset + +import ( + "crypto/ecdsa" + "encoding/hex" + "fmt" + "os" + "path/filepath" + "strings" + + ethcrypto "github.com/ethereum/go-ethereum/crypto" +) + +// LoadOrCreateKey loads a hex-encoded secp256k1 private key from path, or +// generates one and persists it (0600) on first use. This is the owner's +// dataset-signing key: it signs the version log and its address is the owner +// identity buyers pin via Verify. Operators who already control an on-chain +// registration key can point path at that key's hex to unify the identity. +func LoadOrCreateKey(path string) (*ecdsa.PrivateKey, error) { + data, err := os.ReadFile(path) + switch { + case err == nil: + key, perr := ethcrypto.HexToECDSA(strings.TrimSpace(string(data))) + if perr != nil { + return nil, fmt.Errorf("dataset: parse signing key %s: %w", path, perr) + } + return key, nil + case !os.IsNotExist(err): + return nil, fmt.Errorf("dataset: read signing key %s: %w", path, err) + } + + key, err := ethcrypto.GenerateKey() + if err != nil { + return nil, err + } + if err := os.MkdirAll(filepath.Dir(path), 0o700); err != nil { + return nil, err + } + if err := os.WriteFile(path, []byte(hex.EncodeToString(ethcrypto.FromECDSA(key))), 0o600); err != nil { + return nil, fmt.Errorf("dataset: persist signing key: %w", err) + } + return key, nil +} diff --git a/internal/dataset/server.go b/internal/dataset/server.go new file mode 100644 index 00000000..8fb1325c --- /dev/null +++ b/internal/dataset/server.go @@ -0,0 +1,447 @@ +package dataset + +import ( + "crypto/subtle" + "encoding/json" + "fmt" + "log/slog" + "net/http" + "strconv" + "strings" + + "github.com/ObolNetwork/obol-stack/internal/research/groupauth" +) + +// Membership modes (mirror internal/research/server). +const ( + MembershipOpen = "open" + MembershipInvite = "invite" +) + +// PaymentValidator validates a forwarded proof-of-payment for the paid-join +// path. It runs ONLY behind the edge x402-verifier ForwardAuth (which has +// already proven a settled payment); its job is to confirm the payment binds +// to THIS dataset offer and to extract which version + atomic amount was paid. +// It must never be exposed as a raw public route. +type PaymentValidator interface { + Validate(r *http.Request, offerID string) (version int, atomic string, err error) +} + +// Config builds a Server. Log/Ents/Store/Artifacts are owned by the caller so +// the CLI can rehydrate them from disk before serving. +type Config struct { + ID string // dataset id; appears in the route and as the default group id + GroupID string // membership group id (defaults to ID) + Membership string // open | invite (default invite) + OwnerToken string // gates owner-only routes; also a download superuser + OwnerSigner string // 0x address that must have signed the version log (verify pins it) + Log *Log + Ents *Entitlements + Store *Store + Artifacts Artifacts + Payments PaymentValidator + Logger *slog.Logger +} + +// Server hosts one versioned dataset over an owner-run, membership-gated HTTP +// surface. Bytes never leave the owner machine un-gated. +type Server struct { + id string + groupID string + membership string + owner string + ownerSig string + auth *groupauth.Authority + log *Log + ents *Entitlements + store *Store + artifacts Artifacts + payments PaymentValidator + logger *slog.Logger +} + +// NewServer builds a Server from cfg, rehydrating the in-memory groupauth +// Authority from any persisted entitlements so paying members survive a +// restart. +func NewServer(cfg Config) *Server { + if cfg.Logger == nil { + cfg.Logger = slog.Default() + } + if cfg.Membership == "" { + cfg.Membership = MembershipInvite + } + if cfg.GroupID == "" { + cfg.GroupID = cfg.ID + } + if cfg.Log == nil { + cfg.Log = NewLog() + } + if cfg.Ents == nil { + cfg.Ents = NewEntitlements() + } + s := &Server{ + id: cfg.ID, + groupID: cfg.GroupID, + membership: cfg.Membership, + owner: cfg.OwnerToken, + ownerSig: strings.ToLower(strings.TrimSpace(cfg.OwnerSigner)), + auth: groupauth.New(), + log: cfg.Log, + ents: cfg.Ents, + store: cfg.Store, + artifacts: cfg.Artifacts, + payments: cfg.Payments, + logger: cfg.Logger, + } + // Rehydrate groupauth from persisted entitlements (verified by hash; the + // raw token is never needed). + for _, ent := range s.ents.All() { + s.auth.RegisterHash(ent.TokenHash, ent.GroupID, ent.Label) + } + return s +} + +// Handler returns the HTTP mux. +func (s *Server) Handler() http.Handler { + mux := http.NewServeMux() + mux.HandleFunc("GET /healthz", func(w http.ResponseWriter, _ *http.Request) { w.WriteHeader(http.StatusOK) }) + + // Device-auth (public — the device_code is the secret, RFC 8628). + mux.HandleFunc("POST /auth/device/code", s.handleDeviceCode) + mux.HandleFunc("POST /auth/device/token", s.handleDeviceToken) + mux.HandleFunc("POST /auth/device/approve", s.ownerOnly(s.handleApprove)) + + // Paid join — payment mints a version-scoped member token. Behind the + // edge x402-verifier ForwardAuth, never a raw public route. + mux.HandleFunc("POST /dataset/{id}/join/paid", s.handleJoinPaid) + + // Member-gated reads. + mux.HandleFunc("GET /dataset/{id}/versions", s.member(s.handleVersions)) + mux.HandleFunc("GET /dataset/{id}/verify", s.member(s.handleVerify)) + + // Entitlement-gated (member + version) download with Range support. + mux.HandleFunc("GET /dataset/{id}/download", s.downloadGate(s.handleDownload)) + + // Owner-only operational view. + mux.HandleFunc("GET /dataset/{id}/status", s.ownerOnly(s.handleStatus)) + return mux +} + +// --- device-auth (mirrors internal/research/server) --- + +func (s *Server) handleDeviceCode(w http.ResponseWriter, r *http.Request) { + var body struct { + Worker string `json:"worker"` + } + _ = json.NewDecoder(r.Body).Decode(&body) + grant, err := s.auth.RequestCode(body.Worker) + if err != nil { + writeErr(w, http.StatusInternalServerError, "server_error", "failed to create code") + return + } + if s.membership == MembershipOpen { + _ = s.auth.Approve(s.groupID, grant.UserCode) + } + writeJSON(w, http.StatusOK, grant) +} + +func (s *Server) handleDeviceToken(w http.ResponseWriter, r *http.Request) { + var body struct { + DeviceCode string `json:"device_code"` + } + if err := json.NewDecoder(r.Body).Decode(&body); err != nil || body.DeviceCode == "" { + writeErr(w, http.StatusBadRequest, "invalid_request", "device_code required") + return + } + res, err := s.auth.Poll(body.DeviceCode) + switch err { + case nil: + // A freshly-issued device-auth token is an owner-admitted worker: + // grant full access (MaxVersion = head) and persist it. + if res.Token != "" { + head := 0 + if h, ok := s.log.Head(); ok { + head = h.Seq + } + s.ents.Grant(Entitlement{ + TokenHash: groupauth.HashToken(res.Token), + GroupID: s.groupID, + MaxVersion: head, + Label: "worker", + }) + s.persist() + } + writeJSON(w, http.StatusOK, res) + case groupauth.ErrExpired: + writeErr(w, http.StatusGone, "expired_token", "device code expired") + default: + writeErr(w, http.StatusNotFound, "invalid_grant", "device code not found") + } +} + +func (s *Server) handleApprove(w http.ResponseWriter, r *http.Request) { + var body struct { + UserCode string `json:"user_code"` + } + if err := json.NewDecoder(r.Body).Decode(&body); err != nil || body.UserCode == "" { + writeErr(w, http.StatusBadRequest, "invalid_request", "user_code required") + return + } + switch err := s.auth.Approve(s.groupID, body.UserCode); err { + case nil: + writeJSON(w, http.StatusOK, map[string]string{"status": "approved"}) + case groupauth.ErrExpired: + writeErr(w, http.StatusGone, "expired_code", "code expired") + case groupauth.ErrAlreadyUsed: + writeErr(w, http.StatusConflict, "already_used", "code already used") + default: + writeErr(w, http.StatusNotFound, "invalid_code", "code not found") + } +} + +// --- paid join --- + +func (s *Server) handleJoinPaid(w http.ResponseWriter, r *http.Request) { + if !s.matchID(r) { + writeErr(w, http.StatusNotFound, "unknown_dataset", "no such dataset on this host") + return + } + if s.payments == nil { + writeErr(w, http.StatusServiceUnavailable, "paid_join_disabled", "paid join not configured") + return + } + version, atomic, err := s.payments.Validate(r, s.id) + if err != nil { + writeErr(w, http.StatusPaymentRequired, "payment_required", err.Error()) + return + } + if _, ok := s.log.Get(version); !ok { + writeErr(w, http.StatusBadRequest, "unknown_version", fmt.Sprintf("version %d not published", version)) + return + } + raw, hash, err := s.auth.Mint(s.groupID, "paid-v"+strconv.Itoa(version)) + if err != nil { + writeErr(w, http.StatusInternalServerError, "server_error", "failed to mint token") + return + } + s.ents.Grant(Entitlement{ + TokenHash: hash, + GroupID: s.groupID, + MaxVersion: version, + PaidAtomic: atomic, + Label: "paid", + }) + s.persist() + s.logger.Info("paid join", "dataset", s.id, "version", version, "atomic", atomic) + writeJSON(w, http.StatusOK, map[string]any{"token": raw, "version": version}) +} + +// --- member-gated reads --- + +func (s *Server) handleVersions(w http.ResponseWriter, r *http.Request) { + if !s.matchID(r) { + writeErr(w, http.StatusNotFound, "unknown_dataset", "no such dataset on this host") + return + } + writeJSON(w, http.StatusOK, map[string]any{ + "id": s.id, + "versions": s.log.Versions(), + }) +} + +func (s *Server) handleVerify(w http.ResponseWriter, r *http.Request) { + if !s.matchID(r) { + writeErr(w, http.StatusNotFound, "unknown_dataset", "no such dataset on this host") + return + } + err := s.log.Verify(EthVerifier{}, s.ownerSig) + head := 0 + if h, ok := s.log.Head(); ok { + head = h.Seq + } + resp := map[string]any{"valid": err == nil, "head": head, "owner": s.ownerSig} + if err != nil { + resp["error"] = err.Error() + } + writeJSON(w, http.StatusOK, resp) +} + +// --- download --- + +func (s *Server) handleDownload(w http.ResponseWriter, r *http.Request, version int) { + v, ok := s.log.Get(version) + if !ok { + writeErr(w, http.StatusNotFound, "unknown_version", fmt.Sprintf("version %d not published", version)) + return + } + if s.artifacts == nil { + writeErr(w, http.StatusServiceUnavailable, "no_artifacts", "artifact source not configured") + return + } + content, modtime, closeFn, err := s.artifacts.Open(version) + if err != nil { + writeErr(w, http.StatusNotFound, "artifact_missing", err.Error()) + return + } + defer closeFn() + + // Whole-artifact commitments: sent on 200 AND 206 alike, so a resumed or + // multi-connection download verifies against the full-file hash after + // reassembly (the hash is of the whole file, never a chunk). + w.Header().Set("X-Dataset-Version", strconv.Itoa(v.Seq)) + w.Header().Set("X-Dataset-Manifest-Hash", v.ManifestHash) + w.Header().Set("X-Dataset-File-Hash", v.FileHash) + w.Header().Set("Content-Type", "application/x-ndjson") + + // http.ServeContent handles Accept-Ranges, Range -> 206, Content-Range, + // and conditional requests for us. + http.ServeContent(w, r, fmt.Sprintf("%s-v%d.jsonl", s.id, v.Seq), modtime, content) +} + +// downloadGate enforces group membership AND version entitlement before +// handing the resolved version to next. The owner token is a superuser. +func (s *Server) downloadGate(next func(http.ResponseWriter, *http.Request, int)) http.HandlerFunc { + return func(w http.ResponseWriter, r *http.Request) { + if !s.matchID(r) { + writeErr(w, http.StatusNotFound, "unknown_dataset", "no such dataset on this host") + return + } + tok := bearer(r) + if tok == "" { + writeErr(w, http.StatusUnauthorized, "auth_required", "member token required") + return + } + version, ok := s.resolveVersion(r) + if !ok { + writeErr(w, http.StatusNotFound, "unknown_version", "requested version is not published") + return + } + if s.isOwner(tok) { + next(w, r, version) + return + } + gid, ok := s.auth.VerifyToken(tok) + if !ok || gid != s.groupID { + writeErr(w, http.StatusForbidden, "not_a_member", "token is not a member of this dataset") + return + } + if !s.ents.Allows(groupauth.HashToken(tok), version) { + writeErr(w, http.StatusForbidden, "version_not_entitled", + fmt.Sprintf("token is not entitled to version %d", version)) + return + } + next(w, r, version) + } +} + +// resolveVersion returns the requested ?version=N (default: head). ok==false +// when the log is empty or N is out of range. +func (s *Server) resolveVersion(r *http.Request) (int, bool) { + head, ok := s.log.Head() + if !ok { + return 0, false + } + q := strings.TrimSpace(r.URL.Query().Get("version")) + if q == "" { + return head.Seq, true + } + n, err := strconv.Atoi(q) + if err != nil || n < 1 || n > head.Seq { + return 0, false + } + return n, true +} + +// --- owner view --- + +func (s *Server) handleStatus(w http.ResponseWriter, r *http.Request) { + if !s.matchID(r) { + writeErr(w, http.StatusNotFound, "unknown_dataset", "no such dataset on this host") + return + } + writeJSON(w, http.StatusOK, map[string]any{ + "id": s.id, + "groupID": s.groupID, + "membership": s.membership, + "versions": s.log.Versions(), + "entitlements": len(s.ents.All()), + }) +} + +// --- middleware + helpers --- + +func (s *Server) member(next http.HandlerFunc) http.HandlerFunc { + return func(w http.ResponseWriter, r *http.Request) { + tok := bearer(r) + if tok == "" { + writeErr(w, http.StatusUnauthorized, "auth_required", "member token required") + return + } + if s.isOwner(tok) { + next(w, r) + return + } + gid, ok := s.auth.VerifyToken(tok) + if !ok || gid != s.groupID { + writeErr(w, http.StatusForbidden, "not_a_member", "token is not a member of this dataset") + return + } + next(w, r) + } +} + +func (s *Server) ownerOnly(next http.HandlerFunc) http.HandlerFunc { + return func(w http.ResponseWriter, r *http.Request) { + if !s.isOwner(bearer(r)) { + writeErr(w, http.StatusUnauthorized, "owner_required", "owner token required") + return + } + next(w, r) + } +} + +func (s *Server) isOwner(tok string) bool { + return s.owner != "" && subtle.ConstantTimeCompare([]byte(tok), []byte(s.owner)) == 1 +} + +// matchID returns true when the {id} path value addresses this dataset. +func (s *Server) matchID(r *http.Request) bool { + id := r.PathValue("id") + return id == "" || id == s.id +} + +// persist snapshots the log + entitlements to the backing store (best-effort; +// a persistence failure is logged but does not fail the request — the +// in-memory state is authoritative for the live process). +func (s *Server) persist() { + if s.store == nil { + return + } + st := State{ + ID: s.id, + GroupID: s.groupID, + Versions: s.log.Versions(), + Entitlements: s.ents.All(), + } + if err := s.store.Save(st); err != nil { + s.logger.Error("dataset persist failed", "dataset", s.id, "err", err) + } +} + +func bearer(r *http.Request) string { + h := r.Header.Get("Authorization") + if v, ok := strings.CutPrefix(h, "Bearer "); ok { + return strings.TrimSpace(v) + } + return "" +} + +func writeJSON(w http.ResponseWriter, code int, v any) { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(code) + _ = json.NewEncoder(w).Encode(v) +} + +func writeErr(w http.ResponseWriter, code int, kind, msg string) { + writeJSON(w, code, map[string]string{"error": kind, "message": msg}) +} diff --git a/internal/dataset/server_test.go b/internal/dataset/server_test.go new file mode 100644 index 00000000..9e898a68 --- /dev/null +++ b/internal/dataset/server_test.go @@ -0,0 +1,284 @@ +package dataset + +import ( + "bytes" + "crypto/sha256" + "encoding/hex" + "encoding/json" + "fmt" + "io" + "net/http" + "net/http/httptest" + "path/filepath" + "testing" + "time" +) + +const ownerToken = "owner-secret-token" + +// fakePayments stands in for the edge x402-verifier's forwarded proof. +type fakePayments struct { + version int + atomic string + err error +} + +func (f fakePayments) Validate(_ *http.Request, _ string) (int, string, error) { + return f.version, f.atomic, f.err +} + +// memArtifacts serves version bytes from memory (seekable for Range). +type memArtifacts struct{ data map[int][]byte } + +func (m memArtifacts) Open(version int) (io.ReadSeeker, time.Time, func() error, error) { + b, ok := m.data[version] + if !ok { + return nil, time.Time{}, nil, fmt.Errorf("no artifact v%d", version) + } + return bytes.NewReader(b), fixedTime, func() error { return nil }, nil +} + +func sha256hex(b []byte) string { + h := sha256.Sum256(b) + return hex.EncodeToString(h[:]) +} + +type testServer struct { + srv *Server + bytesV1 []byte + signer *EthSigner + store *Store +} + +func newTestServer(t *testing.T, membership string, payments PaymentValidator) testServer { + t.Helper() + signer := newTestSigner(t) + artifact := []byte(`{"messages":[{"role":"user","content":"hi"}]}` + "\n") + fileHash := sha256hex(artifact) + + log := NewLog() + if _, err := log.Append(hashA, fileHash, int64(len(artifact)), signer, fixedTime); err != nil { + t.Fatalf("append v1: %v", err) + } + store := NewStore(filepath.Join(t.TempDir(), "ds.json")) + srv := NewServer(Config{ + ID: "ds", + Membership: membership, + OwnerToken: ownerToken, + OwnerSigner: signer.SignerID(), + Log: log, + Ents: NewEntitlements(), + Store: store, + Artifacts: memArtifacts{data: map[int][]byte{1: artifact}}, + Payments: payments, + }) + return testServer{srv: srv, bytesV1: artifact, signer: signer, store: store} +} + +func do(t *testing.T, h http.Handler, method, target, token string, hdr map[string]string) *httptest.ResponseRecorder { + t.Helper() + r := httptest.NewRequest(method, target, nil) + if token != "" { + r.Header.Set("Authorization", "Bearer "+token) + } + for k, v := range hdr { + r.Header.Set(k, v) + } + w := httptest.NewRecorder() + h.ServeHTTP(w, r) + return w +} + +func TestServer_PaidJoinThenDownload(t *testing.T) { + ts := newTestServer(t, MembershipInvite, fakePayments{version: 1, atomic: "1000"}) + h := ts.srv.Handler() + + // Pay -> mint a version-1 token. + w := do(t, h, "POST", "/dataset/ds/join/paid", "", nil) + if w.Code != http.StatusOK { + t.Fatalf("join/paid = %d, body %s", w.Code, w.Body.String()) + } + var join struct { + Token string `json:"token"` + Version int `json:"version"` + } + if err := json.Unmarshal(w.Body.Bytes(), &join); err != nil { + t.Fatalf("join body: %v", err) + } + if join.Token == "" || join.Version != 1 { + t.Fatalf("join = %+v", join) + } + + // Download v1 with the minted token; verify whole-file hash matches. + dw := do(t, h, "GET", "/dataset/ds/download?version=1", join.Token, nil) + if dw.Code != http.StatusOK { + t.Fatalf("download = %d, body %s", dw.Code, dw.Body.String()) + } + if !bytes.Equal(dw.Body.Bytes(), ts.bytesV1) { + t.Error("downloaded bytes != artifact") + } + if got := dw.Header().Get("X-Dataset-File-Hash"); got != sha256hex(ts.bytesV1) { + t.Errorf("X-Dataset-File-Hash = %q, want %q", got, sha256hex(ts.bytesV1)) + } + if sha256hex(dw.Body.Bytes()) != dw.Header().Get("X-Dataset-File-Hash") { + t.Error("recomputed body hash != advertised file hash") + } + if dw.Header().Get("Accept-Ranges") != "bytes" { + t.Error("download did not advertise Range support") + } +} + +func TestServer_VersionScopeEnforced(t *testing.T) { + ts := newTestServer(t, MembershipInvite, fakePayments{version: 1, atomic: "1000"}) + h := ts.srv.Handler() + + // Append a v2 to the log so ?version=2 is a real (but unpaid) version. + if _, err := ts.srv.log.Append(hashB, hashB, 5, ts.signer, fixedTime); err != nil { + t.Fatalf("append v2: %v", err) + } + + w := do(t, h, "POST", "/dataset/ds/join/paid", "", nil) // pays for v1 + var join struct{ Token string } + _ = json.Unmarshal(w.Body.Bytes(), &join) + + // v1 token may fetch v1... + if dw := do(t, h, "GET", "/dataset/ds/download?version=1", join.Token, nil); dw.Code != http.StatusOK { + t.Errorf("v1 token download v1 = %d, want 200", dw.Code) + } + // ...but is forbidden from v2. + dw := do(t, h, "GET", "/dataset/ds/download?version=2", join.Token, nil) + if dw.Code != http.StatusForbidden { + t.Fatalf("v1 token download v2 = %d, want 403", dw.Code) + } + if got := errorKind(t, dw.Body.Bytes()); got != "version_not_entitled" { + t.Errorf("error = %q, want version_not_entitled", got) + } +} + +func TestServer_RangeReturns206WithWholeFileHash(t *testing.T) { + ts := newTestServer(t, MembershipOpen, nil) + h := ts.srv.Handler() + token := ownerToken // owner is a download superuser + + dw := do(t, h, "GET", "/dataset/ds/download?version=1", token, map[string]string{"Range": "bytes=0-3"}) + if dw.Code != http.StatusPartialContent { + t.Fatalf("range request = %d, want 206", dw.Code) + } + if !bytes.Equal(dw.Body.Bytes(), ts.bytesV1[:4]) { + t.Errorf("partial body = %q, want first 4 bytes %q", dw.Body.Bytes(), ts.bytesV1[:4]) + } + // Whole-file hash header must be present on 206 (commits to the full file). + if got := dw.Header().Get("X-Dataset-File-Hash"); got != sha256hex(ts.bytesV1) { + t.Errorf("206 X-Dataset-File-Hash = %q, want whole-file %q", got, sha256hex(ts.bytesV1)) + } +} + +func TestServer_GatesRejectNonMembersAndAnonymous(t *testing.T) { + ts := newTestServer(t, MembershipInvite, fakePayments{version: 1}) + h := ts.srv.Handler() + + if w := do(t, h, "GET", "/dataset/ds/download?version=1", "", nil); w.Code != http.StatusUnauthorized { + t.Errorf("anonymous download = %d, want 401", w.Code) + } + if w := do(t, h, "GET", "/dataset/ds/download?version=1", "obol-research-mt-bogus", nil); w.Code != http.StatusForbidden { + t.Errorf("non-member download = %d, want 403", w.Code) + } + if w := do(t, h, "GET", "/dataset/ds/status", "not-the-owner", nil); w.Code != http.StatusUnauthorized { + t.Errorf("non-owner status = %d, want 401", w.Code) + } + if w := do(t, h, "GET", "/dataset/ds/status", ownerToken, nil); w.Code != http.StatusOK { + t.Errorf("owner status = %d, want 200", w.Code) + } +} + +func TestServer_DeviceAuthAdmitGetsHeadAccess(t *testing.T) { + ts := newTestServer(t, MembershipOpen, nil) // open: auto-approved on code request + h := ts.srv.Handler() + + // device code (auto-approved) -> token + cw := do(t, h, "POST", "/auth/device/code", "", nil) + var grant struct { + DeviceCode string `json:"device_code"` + } + _ = json.Unmarshal(cw.Body.Bytes(), &grant) + + r := httptest.NewRequest("POST", "/auth/device/token", bytes.NewReader([]byte(`{"device_code":"`+grant.DeviceCode+`"}`))) + tw := httptest.NewRecorder() + h.ServeHTTP(tw, r) + var tok struct { + Token string `json:"token"` + } + _ = json.Unmarshal(tw.Body.Bytes(), &tok) + if tok.Token == "" { + t.Fatalf("no token issued: %s", tw.Body.String()) + } + + if dw := do(t, h, "GET", "/dataset/ds/download?version=1", tok.Token, nil); dw.Code != http.StatusOK { + t.Errorf("admitted worker download = %d, want 200", dw.Code) + } +} + +func TestServer_VerifyReportsChainHealth(t *testing.T) { + ts := newTestServer(t, MembershipOpen, nil) + h := ts.srv.Handler() + + w := do(t, h, "GET", "/dataset/ds/verify", ownerToken, nil) + if w.Code != http.StatusOK { + t.Fatalf("verify = %d", w.Code) + } + var res struct { + Valid bool `json:"valid"` + Head int `json:"head"` + } + _ = json.Unmarshal(w.Body.Bytes(), &res) + if !res.Valid || res.Head != 1 { + t.Errorf("verify = %+v, want valid head=1", res) + } +} + +func TestServer_RehydratesPaidMemberAfterRestart(t *testing.T) { + ts := newTestServer(t, MembershipInvite, fakePayments{version: 1, atomic: "1000"}) + h := ts.srv.Handler() + + jw := do(t, h, "POST", "/dataset/ds/join/paid", "", nil) + var join struct{ Token string } + _ = json.Unmarshal(jw.Body.Bytes(), &join) + + // Simulate a restart: load persisted state into a brand-new server. + st, err := ts.store.Load() + if err != nil { + t.Fatalf("load: %v", err) + } + if len(st.Entitlements) != 1 { + t.Fatalf("persisted entitlements = %d, want 1", len(st.Entitlements)) + } + restarted := NewServer(Config{ + ID: "ds", Membership: MembershipInvite, OwnerToken: ownerToken, + OwnerSigner: ts.signer.SignerID(), + Log: LogFromVersions(st.Versions), + Ents: loadEnts(st.Entitlements), + Store: ts.store, + Artifacts: memArtifacts{data: map[int][]byte{1: ts.bytesV1}}, + Payments: fakePayments{version: 1}, + }) + + // The pre-restart token still works — the member did not have to re-pay. + if dw := do(t, restarted.Handler(), "GET", "/dataset/ds/download?version=1", join.Token, nil); dw.Code != http.StatusOK { + t.Errorf("post-restart download = %d, want 200 (rehydration failed)", dw.Code) + } +} + +func loadEnts(ents []Entitlement) *Entitlements { + e := NewEntitlements() + e.Load(ents) + return e +} + +func errorKind(t *testing.T, body []byte) string { + t.Helper() + var e struct { + Error string `json:"error"` + } + _ = json.Unmarshal(body, &e) + return e.Error +} diff --git a/internal/dataset/signer.go b/internal/dataset/signer.go new file mode 100644 index 00000000..d30240ad --- /dev/null +++ b/internal/dataset/signer.go @@ -0,0 +1,59 @@ +package dataset + +import ( + "crypto/ecdsa" + "encoding/hex" + "fmt" + "strings" + + ethcrypto "github.com/ethereum/go-ethereum/crypto" +) + +// EthSigner signs version digests with a secp256k1 key — the same key kind +// used for the owner's on-chain (ERC-8004) registration. No new key custody: +// the caller supplies the already-loaded key. +type EthSigner struct { + priv *ecdsa.PrivateKey + addr string +} + +// NewEthSigner wraps a secp256k1 private key as a Signer. +func NewEthSigner(priv *ecdsa.PrivateKey) *EthSigner { + return &EthSigner{ + priv: priv, + addr: strings.ToLower(ethcrypto.PubkeyToAddress(priv.PublicKey).Hex()), + } +} + +// SignerID returns the lowercased 0x EVM address of the signing key. +func (s *EthSigner) SignerID() string { return s.addr } + +// SignDigest returns a 65-byte [R||S||V] secp256k1 signature, hex-encoded. +func (s *EthSigner) SignDigest(digest [32]byte) (string, error) { + sig, err := ethcrypto.Sign(digest[:], s.priv) + if err != nil { + return "", err + } + return hex.EncodeToString(sig), nil +} + +// EthVerifier recovers the signer's EVM address from a secp256k1 signature. +// It is the zero value of an empty struct — stateless and reusable. +type EthVerifier struct{} + +// RecoverSigner recovers the lowercased 0x EVM address that produced sigHex +// over digest. +func (EthVerifier) RecoverSigner(digest [32]byte, sigHex string) (string, error) { + sig, err := hex.DecodeString(sigHex) + if err != nil { + return "", fmt.Errorf("decode signature: %w", err) + } + if len(sig) != 65 { + return "", fmt.Errorf("signature length %d, want 65", len(sig)) + } + pub, err := ethcrypto.SigToPub(digest[:], sig) + if err != nil { + return "", fmt.Errorf("recover pubkey: %w", err) + } + return strings.ToLower(ethcrypto.PubkeyToAddress(*pub).Hex()), nil +} diff --git a/internal/dataset/store.go b/internal/dataset/store.go new file mode 100644 index 00000000..a1a7390f --- /dev/null +++ b/internal/dataset/store.go @@ -0,0 +1,89 @@ +package dataset + +import ( + "encoding/json" + "errors" + "fmt" + "os" + "path/filepath" + "sync" +) + +// State is the restart-surviving state of a dataset server: its signed +// version log plus the member-token entitlements. Persisting the entitlements +// (by token hash) lets a continuously-sold dataset rehydrate paying members +// after a host restart instead of forcing every subscriber to re-pay. +type State struct { + ID string `json:"id"` + GroupID string `json:"groupID"` + Versions []DatasetVersion `json:"versions"` + Entitlements []Entitlement `json:"entitlements"` + // Artifacts maps a version Seq to the operator-local file path serving + // its bytes. It is local-only metadata (not part of the signed log), so a + // `publish` can rebuild its file-backed artifact source from disk. + Artifacts map[int]string `json:"artifacts,omitempty"` +} + +// Store persists State to a JSON file using an atomic temp-file + rename, so a +// crash mid-write never leaves a torn file (matches internal/network/record.go). +type Store struct { + path string + mu sync.Mutex +} + +// NewStore returns a Store backed by path. +func NewStore(path string) *Store { return &Store{path: path} } + +// Path is the backing file path. +func (s *Store) Path() string { return s.path } + +// Load reads the persisted state. A missing file is not an error — it returns +// the zero State so a fresh dataset starts clean. +func (s *Store) Load() (State, error) { + s.mu.Lock() + defer s.mu.Unlock() + data, err := os.ReadFile(s.path) + if err != nil { + if errors.Is(err, os.ErrNotExist) { + return State{}, nil + } + return State{}, fmt.Errorf("dataset: read store %s: %w", s.path, err) + } + var st State + if err := json.Unmarshal(data, &st); err != nil { + return State{}, fmt.Errorf("dataset: parse store %s: %w", s.path, err) + } + return st, nil +} + +// Save atomically writes state: marshal -> temp file in the same dir -> fsync +// via close -> rename over the target. +func (s *Store) Save(st State) error { + s.mu.Lock() + defer s.mu.Unlock() + data, err := json.MarshalIndent(st, "", " ") + if err != nil { + return fmt.Errorf("dataset: marshal store: %w", err) + } + dir := filepath.Dir(s.path) + if err := os.MkdirAll(dir, 0o755); err != nil { + return fmt.Errorf("dataset: mkdir %s: %w", dir, err) + } + tmp, err := os.CreateTemp(dir, ".dataset-*.tmp") + if err != nil { + return fmt.Errorf("dataset: create temp: %w", err) + } + tmpName := tmp.Name() + defer os.Remove(tmpName) // no-op once the rename succeeds + if _, err := tmp.Write(data); err != nil { + tmp.Close() + return fmt.Errorf("dataset: write temp: %w", err) + } + if err := tmp.Close(); err != nil { + return fmt.Errorf("dataset: close temp: %w", err) + } + if err := os.Rename(tmpName, s.path); err != nil { + return fmt.Errorf("dataset: rename temp over %s: %w", s.path, err) + } + return nil +} diff --git a/internal/dataset/store_test.go b/internal/dataset/store_test.go new file mode 100644 index 00000000..f2e3dac0 --- /dev/null +++ b/internal/dataset/store_test.go @@ -0,0 +1,68 @@ +package dataset + +import ( + "path/filepath" + "reflect" + "testing" + "time" +) + +func TestStore_SaveLoadRoundTrip(t *testing.T) { + path := filepath.Join(t.TempDir(), "nested", "state.json") + store := NewStore(path) + + want := State{ + ID: "pi-sessions", + GroupID: "grp-123", + Versions: []DatasetVersion{ + {Seq: 1, ManifestHash: hashA, FileHash: hashA, Size: 100, Timestamp: fixedTime, Signature: "sig1"}, + {Seq: 2, ManifestHash: hashB, FileHash: hashB, Size: 200, PrevSig: "sig1", Timestamp: fixedTime, Signature: "sig2"}, + }, + Entitlements: []Entitlement{{TokenHash: "tok", GroupID: "grp-123", MaxVersion: 2, PaidAtomic: "2000"}}, + } + + if err := store.Save(want); err != nil { + t.Fatalf("Save: %v", err) // also creates the nested dir + } + got, err := store.Load() + if err != nil { + t.Fatalf("Load: %v", err) + } + if !reflect.DeepEqual(got, want) { + t.Errorf("round-trip mismatch:\n got %+v\nwant %+v", got, want) + } +} + +func TestStore_LoadMissingFileIsEmpty(t *testing.T) { + store := NewStore(filepath.Join(t.TempDir(), "does-not-exist.json")) + got, err := store.Load() + if err != nil { + t.Fatalf("Load of missing file should not error, got %v", err) + } + if !reflect.DeepEqual(got, State{}) { + t.Errorf("missing-file Load = %+v, want zero State", got) + } +} + +func TestStore_SaveOverwritesAtomically(t *testing.T) { + path := filepath.Join(t.TempDir(), "state.json") + store := NewStore(path) + + if err := store.Save(State{ID: "v1", Versions: []DatasetVersion{{Seq: 1, ManifestHash: hashA, FileHash: hashA, Timestamp: fixedTime}}}); err != nil { + t.Fatalf("first Save: %v", err) + } + if err := store.Save(State{ID: "v2", Versions: []DatasetVersion{ + {Seq: 1, ManifestHash: hashA, FileHash: hashA, Timestamp: fixedTime}, + {Seq: 2, ManifestHash: hashB, FileHash: hashB, Timestamp: fixedTime.Add(time.Hour)}, + }}); err != nil { + t.Fatalf("second Save: %v", err) + } + + got, err := store.Load() + if err != nil { + t.Fatalf("Load: %v", err) + } + if got.ID != "v2" || len(got.Versions) != 2 { + t.Errorf("overwrite failed: got ID=%q versions=%d, want v2/2", got.ID, len(got.Versions)) + } +} diff --git a/internal/dataset/versionlog.go b/internal/dataset/versionlog.go new file mode 100644 index 00000000..e212906e --- /dev/null +++ b/internal/dataset/versionlog.go @@ -0,0 +1,244 @@ +// Package dataset implements the owner-hosted, membership-gated serving of +// versioned dataset artifacts: a signed, hash-chained version log +// (content-addressed by manifestHash), a token -> version entitlement map, +// and an HTTP server that streams artifacts to paying/approved members. +// +// The version log is a money/integrity primitive: each published version is +// signed by the owner's secp256k1 key (the same key used for on-chain +// registration) over a canonical, length-prefixed, domain-separated digest, +// and chained to its predecessor's signature so a third party replaying or +// reordering a fetched log is detectable offline. +package dataset + +import ( + "crypto/sha256" + "encoding/binary" + "encoding/hex" + "errors" + "fmt" + "strings" + "sync" + "time" +) + +// digestDST is the 32-byte domain-separation tag mixed into every version +// digest. It prevents a signature minted here from being replayed as a +// signature in any other obol protocol. The string is zero-padded to 32 bytes. +const digestDST = "obol/dataset/versionlog/v1" + +// DatasetVersion is one immutable, signed entry in a dataset's version log. +// The human version tag is Seq (v1, v2, ...). ManifestHash is the +// content-address anchor; FileHash is the SHA-256 of the whole served +// artifact; Size binds the per-version price. PrevSig chains to the +// predecessor's Signature ("" at Seq==1). +type DatasetVersion struct { + Seq int `json:"seq"` + ManifestHash string `json:"manifestHash"` + FileHash string `json:"fileHash"` + Size int64 `json:"size"` + PrevSig string `json:"prevSig"` + Timestamp time.Time `json:"timestamp"` + Signature string `json:"signature"` +} + +// Signer signs a 32-byte digest with the owner's key. SignerID is the +// signer's lowercased 0x EVM address (recoverable from the signature). +type Signer interface { + SignerID() string + SignDigest(digest [32]byte) (sigHex string, err error) +} + +// Verifier recovers the signer's EVM address from a signature over a digest. +type Verifier interface { + RecoverSigner(digest [32]byte, sigHex string) (signerID string, err error) +} + +// CanonicalDigest computes the SHA-256 digest the owner signs for v, using a +// fixed-width, length-prefixed, domain-separated encoding (NOT fmt/concat): +// +// SHA-256( DST[32] ‖ u64be(Seq) ‖ manifest[32] ‖ file[32] ‖ u64be(Size) +// ‖ u8(len(PrevSig)) ‖ PrevSig ) +// +// ManifestHash and FileHash MUST be exactly 64 lowercase-or-mixed hex chars; +// anything else is rejected so attacker-controlled width can never shift the +// encoding. +func CanonicalDigest(v DatasetVersion) ([32]byte, error) { + var zero [32]byte + if v.Seq < 1 { + return zero, fmt.Errorf("dataset: seq must be >= 1, got %d", v.Seq) + } + if v.Size < 0 { + return zero, fmt.Errorf("dataset: size must be >= 0, got %d", v.Size) + } + mh, err := decodeSHA256Hex(v.ManifestHash) + if err != nil { + return zero, fmt.Errorf("dataset: manifestHash: %w", err) + } + fh, err := decodeSHA256Hex(v.FileHash) + if err != nil { + return zero, fmt.Errorf("dataset: fileHash: %w", err) + } + if len(v.PrevSig) > 255 { + return zero, fmt.Errorf("dataset: prevSig too long (%d > 255)", len(v.PrevSig)) + } + + var dst [32]byte + copy(dst[:], digestDST) + + h := sha256.New() + h.Write(dst[:]) + var u64 [8]byte + binary.BigEndian.PutUint64(u64[:], uint64(v.Seq)) + h.Write(u64[:]) + h.Write(mh) + h.Write(fh) + binary.BigEndian.PutUint64(u64[:], uint64(v.Size)) + h.Write(u64[:]) + h.Write([]byte{byte(len(v.PrevSig))}) + h.Write([]byte(v.PrevSig)) + + var out [32]byte + copy(out[:], h.Sum(nil)) + return out, nil +} + +// decodeSHA256Hex requires exactly 64 hex chars (a SHA-256) and returns the +// 32 raw bytes. +func decodeSHA256Hex(s string) ([]byte, error) { + if len(s) != 64 { + return nil, fmt.Errorf("want 64 hex chars, got %d", len(s)) + } + b, err := hex.DecodeString(s) + if err != nil { + return nil, err + } + return b, nil +} + +// Log is an append-only, in-memory sequence of signed dataset versions. +// Safe for concurrent use. +type Log struct { + mu sync.Mutex + versions []DatasetVersion +} + +// NewLog returns an empty log. +func NewLog() *Log { return &Log{} } + +// LogFromVersions rebuilds a log from persisted versions (e.g. loaded from +// disk). The versions are NOT re-verified here; call Verify for that. +func LogFromVersions(versions []DatasetVersion) *Log { + cp := make([]DatasetVersion, len(versions)) + copy(cp, versions) + return &Log{versions: cp} +} + +// Len returns the number of versions. +func (l *Log) Len() int { + l.mu.Lock() + defer l.mu.Unlock() + return len(l.versions) +} + +// Versions returns a copy of every version in sequence order. +func (l *Log) Versions() []DatasetVersion { + l.mu.Lock() + defer l.mu.Unlock() + out := make([]DatasetVersion, len(l.versions)) + copy(out, l.versions) + return out +} + +// Head returns the latest version, or ok==false when the log is empty. +func (l *Log) Head() (DatasetVersion, bool) { + l.mu.Lock() + defer l.mu.Unlock() + if len(l.versions) == 0 { + return DatasetVersion{}, false + } + return l.versions[len(l.versions)-1], true +} + +// Get returns the version with the given Seq (1-based), or ok==false. +func (l *Log) Get(seq int) (DatasetVersion, bool) { + l.mu.Lock() + defer l.mu.Unlock() + if seq < 1 || seq > len(l.versions) { + return DatasetVersion{}, false + } + return l.versions[seq-1], true +} + +// Append builds the next version (Seq = len+1, chained to the prior +// signature), signs it with signer, and appends it. The hex digests are +// lowercased for stable byte-comparison downstream. +func (l *Log) Append(manifestHash, fileHash string, size int64, signer Signer, now time.Time) (DatasetVersion, error) { + l.mu.Lock() + defer l.mu.Unlock() + + prevSig := "" + if n := len(l.versions); n > 0 { + prevSig = l.versions[n-1].Signature + } + v := DatasetVersion{ + Seq: len(l.versions) + 1, + ManifestHash: strings.ToLower(strings.TrimSpace(manifestHash)), + FileHash: strings.ToLower(strings.TrimSpace(fileHash)), + Size: size, + PrevSig: prevSig, + Timestamp: now.UTC(), + } + digest, err := CanonicalDigest(v) + if err != nil { + return DatasetVersion{}, err + } + sig, err := signer.SignDigest(digest) + if err != nil { + return DatasetVersion{}, fmt.Errorf("dataset: sign version %d: %w", v.Seq, err) + } + v.Signature = sig + l.versions = append(l.versions, v) + return v, nil +} + +// ErrEmptyLog is returned by Verify when there are no versions. +var ErrEmptyLog = errors.New("dataset: version log is empty") + +// Verify walks the log v1..head and rejects it unless every entry: has a +// monotonic Seq starting at 1, chains correctly to its predecessor's +// Signature, carries a signature recoverable to expectedSigner, and has a +// valid canonical digest. This detects offline any reorder, middle removal, +// or tamper of a fetched log. (Tail truncation yields a valid shorter chain +// and is caught instead by comparing the advertised head version, not here.) +// expectedSigner == "" skips the owner-identity check (signature validity +// only). +func (l *Log) Verify(verifier Verifier, expectedSigner string) error { + l.mu.Lock() + defer l.mu.Unlock() + if len(l.versions) == 0 { + return ErrEmptyLog + } + want := strings.ToLower(strings.TrimSpace(expectedSigner)) + prevSig := "" + for i, v := range l.versions { + if v.Seq != i+1 { + return fmt.Errorf("dataset: version at index %d has non-monotonic seq %d", i, v.Seq) + } + if v.PrevSig != prevSig { + return fmt.Errorf("dataset: version %d: chain break (prevSig does not match predecessor)", v.Seq) + } + digest, err := CanonicalDigest(v) + if err != nil { + return fmt.Errorf("dataset: version %d: %w", v.Seq, err) + } + signer, err := verifier.RecoverSigner(digest, v.Signature) + if err != nil { + return fmt.Errorf("dataset: version %d: bad signature: %w", v.Seq, err) + } + if want != "" && strings.ToLower(signer) != want { + return fmt.Errorf("dataset: version %d: signed by %s, want owner %s", v.Seq, signer, want) + } + prevSig = v.Signature + } + return nil +} diff --git a/internal/dataset/versionlog_test.go b/internal/dataset/versionlog_test.go new file mode 100644 index 00000000..3a8c1559 --- /dev/null +++ b/internal/dataset/versionlog_test.go @@ -0,0 +1,208 @@ +package dataset + +import ( + "strings" + "testing" + "time" + + ethcrypto "github.com/ethereum/go-ethereum/crypto" +) + +const ( + hashA = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" + hashB = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb" + hashC = "cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc" +) + +func newTestSigner(t *testing.T) *EthSigner { + t.Helper() + key, err := ethcrypto.GenerateKey() + if err != nil { + t.Fatalf("GenerateKey: %v", err) + } + return NewEthSigner(key) +} + +var fixedTime = time.Date(2026, 6, 14, 12, 0, 0, 0, time.UTC) + +func TestCanonicalDigest_RejectsMalformedHashes(t *testing.T) { + tests := []struct { + name string + v DatasetVersion + }{ + {"short manifest", DatasetVersion{Seq: 1, ManifestHash: "abcd", FileHash: hashB}}, + {"non-hex manifest", DatasetVersion{Seq: 1, ManifestHash: strings.Repeat("z", 64), FileHash: hashB}}, + {"short file", DatasetVersion{Seq: 1, ManifestHash: hashA, FileHash: "ff"}}, + {"empty file", DatasetVersion{Seq: 1, ManifestHash: hashA, FileHash: ""}}, + {"seq zero", DatasetVersion{Seq: 0, ManifestHash: hashA, FileHash: hashB}}, + {"negative size", DatasetVersion{Seq: 1, ManifestHash: hashA, FileHash: hashB, Size: -1}}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if _, err := CanonicalDigest(tt.v); err == nil { + t.Errorf("CanonicalDigest(%+v) = nil err, want rejection", tt.v) + } + }) + } +} + +func TestCanonicalDigest_DeterministicAndFieldSensitive(t *testing.T) { + base := DatasetVersion{Seq: 2, ManifestHash: hashA, FileHash: hashB, Size: 1048576, PrevSig: "deadbeef"} + d1, err := CanonicalDigest(base) + if err != nil { + t.Fatalf("digest: %v", err) + } + d2, _ := CanonicalDigest(base) + if d1 != d2 { + t.Fatal("digest not deterministic for identical input") + } + + // Each field change must move the digest (no canonicalization collision). + mutated := []DatasetVersion{ + {Seq: 3, ManifestHash: hashA, FileHash: hashB, Size: 1048576, PrevSig: "deadbeef"}, + {Seq: 2, ManifestHash: hashC, FileHash: hashB, Size: 1048576, PrevSig: "deadbeef"}, + {Seq: 2, ManifestHash: hashA, FileHash: hashC, Size: 1048576, PrevSig: "deadbeef"}, + {Seq: 2, ManifestHash: hashA, FileHash: hashB, Size: 1048577, PrevSig: "deadbeef"}, + {Seq: 2, ManifestHash: hashA, FileHash: hashB, Size: 1048576, PrevSig: "cafebabe"}, + } + for i, m := range mutated { + d, err := CanonicalDigest(m) + if err != nil { + t.Fatalf("mutated[%d] digest: %v", i, err) + } + if d == d1 { + t.Errorf("mutated[%d] produced the same digest as base — field not bound", i) + } + } +} + +// TestCanonicalDigest_NoConcatAmbiguity proves the length-prefix defeats the +// classic concat footgun: moving a char across the Size/PrevSig boundary must +// change the digest. (Two versions whose naive ":"-join would collide.) +func TestCanonicalDigest_NoConcatAmbiguity(t *testing.T) { + a := DatasetVersion{Seq: 1, ManifestHash: hashA, FileHash: hashB, Size: 12, PrevSig: "3abc"} + b := DatasetVersion{Seq: 1, ManifestHash: hashA, FileHash: hashB, Size: 123, PrevSig: "abc"} + da, _ := CanonicalDigest(a) + db, _ := CanonicalDigest(b) + if da == db { + t.Fatal("length-prefix failed: ambiguous Size/PrevSig boundary collided") + } +} + +func TestLog_AppendChainsAndVerifies(t *testing.T) { + signer := newTestSigner(t) + log := NewLog() + + v1, err := log.Append(hashA, hashA, 100, signer, fixedTime) + if err != nil { + t.Fatalf("append v1: %v", err) + } + if v1.Seq != 1 || v1.PrevSig != "" { + t.Errorf("v1 seq=%d prevSig=%q, want 1 and empty", v1.Seq, v1.PrevSig) + } + v2, err := log.Append(hashB, hashB, 200, signer, fixedTime) + if err != nil { + t.Fatalf("append v2: %v", err) + } + if v2.Seq != 2 || v2.PrevSig != v1.Signature { + t.Errorf("v2 not chained to v1: prevSig=%q want %q", v2.PrevSig, v1.Signature) + } + if _, err := log.Append(hashC, hashC, 300, signer, fixedTime); err != nil { + t.Fatalf("append v3: %v", err) + } + + if err := log.Verify(EthVerifier{}, signer.SignerID()); err != nil { + t.Errorf("Verify on a clean chain failed: %v", err) + } + if head, ok := log.Head(); !ok || head.Seq != 3 { + t.Errorf("Head seq = %v (ok=%v), want 3", head.Seq, ok) + } +} + +func TestLog_Verify_DetectsTamper(t *testing.T) { + signer := newTestSigner(t) + log := NewLog() + _, _ = log.Append(hashA, hashA, 100, signer, fixedTime) + _, _ = log.Append(hashB, hashB, 200, signer, fixedTime) + + // Mutate a field on the persisted copy and rebuild a log from it. + versions := log.Versions() + versions[1].Size = 999999 // attacker inflates the size after signing + tampered := LogFromVersions(versions) + if err := tampered.Verify(EthVerifier{}, signer.SignerID()); err == nil { + t.Error("Verify accepted a tampered Size — signature not binding") + } +} + +func TestLog_Verify_DetectsReorderAndMiddleRemoval(t *testing.T) { + signer := newTestSigner(t) + log := NewLog() + _, _ = log.Append(hashA, hashA, 100, signer, fixedTime) + _, _ = log.Append(hashB, hashB, 200, signer, fixedTime) + _, _ = log.Append(hashC, hashC, 300, signer, fixedTime) + versions := log.Versions() + + t.Run("reorder", func(t *testing.T) { + swapped := []DatasetVersion{versions[0], versions[2], versions[1]} + if err := LogFromVersions(swapped).Verify(EthVerifier{}, signer.SignerID()); err == nil { + t.Error("Verify accepted a reordered log") + } + }) + t.Run("middle removal", func(t *testing.T) { + gapped := []DatasetVersion{versions[0], versions[2]} + if err := LogFromVersions(gapped).Verify(EthVerifier{}, signer.SignerID()); err == nil { + t.Error("Verify accepted a log with a removed middle entry") + } + }) +} + +func TestLog_Verify_RejectsWrongSigner(t *testing.T) { + owner := newTestSigner(t) + attacker := newTestSigner(t) + log := NewLog() + _, _ = log.Append(hashA, hashA, 100, attacker, fixedTime) // signed by the wrong key + + if err := log.Verify(EthVerifier{}, owner.SignerID()); err == nil { + t.Error("Verify accepted a version signed by a non-owner key") + } + // ...but accepts it when we don't pin the owner (signature-validity only). + if err := log.Verify(EthVerifier{}, ""); err != nil { + t.Errorf("Verify with no owner pin rejected a structurally valid sig: %v", err) + } +} + +func TestLog_Verify_EmptyLog(t *testing.T) { + if err := NewLog().Verify(EthVerifier{}, ""); err != ErrEmptyLog { + t.Errorf("empty log Verify = %v, want ErrEmptyLog", err) + } +} + +// FuzzCanonicalDigest asserts the encoder never panics on arbitrary hash +// inputs and is total: valid 64-hex pairs always digest, everything else is a +// clean error. +func FuzzCanonicalDigest(f *testing.F) { + f.Add(hashA, hashB, int64(100), "") + f.Add("abcd", "", int64(0), "ff") + f.Add(strings.Repeat("0", 64), strings.Repeat("f", 64), int64(1<<40), strings.Repeat("9", 130)) + f.Fuzz(func(t *testing.T, manifest, file string, size int64, prevSig string) { + v := DatasetVersion{Seq: 1, ManifestHash: manifest, FileHash: file, Size: size, PrevSig: prevSig} + _, err := CanonicalDigest(v) + valid := len(manifest) == 64 && len(file) == 64 && + isHex(manifest) && isHex(file) && size >= 0 && len(prevSig) <= 255 + if valid && err != nil { + t.Errorf("valid input rejected: %v", err) + } + if !valid && err == nil { + t.Errorf("invalid input accepted: manifest=%q file=%q size=%d prevSigLen=%d", manifest, file, size, len(prevSig)) + } + }) +} + +func isHex(s string) bool { + for _, c := range s { + if !((c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F')) { + return false + } + } + return true +} diff --git a/internal/defaults/defaults.go b/internal/defaults/defaults.go index 9083180d..96a4973a 100644 --- a/internal/defaults/defaults.go +++ b/internal/defaults/defaults.go @@ -138,6 +138,7 @@ var devLocallyBuiltImageBases = []string{ "ghcr.io/obolnetwork/x402-verifier", "ghcr.io/obolnetwork/serviceoffer-controller", "ghcr.io/obolnetwork/x402-buyer", + "ghcr.io/obolnetwork/x402-escrow", "ghcr.io/obolnetwork/demo-server", "ghcr.io/obolnetwork/obol-stack-public-storefront", } diff --git a/internal/defaults/defaults_test.go b/internal/defaults/defaults_test.go index 60d84ef3..67fd6260 100644 --- a/internal/defaults/defaults_test.go +++ b/internal/defaults/defaults_test.go @@ -41,11 +41,15 @@ func TestCopyInfrastructure_DevModeRewritesDigestPins(t *testing.T) { for _, base := range []string{ "ghcr.io/obolnetwork/x402-verifier", "ghcr.io/obolnetwork/serviceoffer-controller", + "ghcr.io/obolnetwork/x402-escrow", } { want := base + ":" + devTag if !strings.Contains(out, want) { t.Errorf("dev mode did not rewrite to %q in %s", want, x402Path) } + if strings.Contains(out, base+":"+devTag+"@sha256:") { + t.Errorf("dev mode left orphan @sha256: suffix on %s:%s in %s — regex missed the combo form", base, devTag, x402Path) + } } // The persisted dev tag MUST equal what was stamped into the manifests, or @@ -79,6 +83,51 @@ func TestCopyInfrastructure_DevModeRewritesDigestPins(t *testing.T) { } } +// TestRewriteDevDigestPins_ComboFormAllBases pins the rewrite behaviour for +// every locally-built base — including ghcr.io/obolnetwork/x402-escrow — +// against all three pin styles, with the combo `@sha256:` form +// exercised explicitly. The embedded manifests don't carry every base in +// every style (x402-escrow ships tag-only until the first publish), so this +// synthetic file guarantees a future digest bump can't resurrect the +// orphan-@sha256 bug for a base the real tree happens not to cover today. +func TestRewriteDevDigestPins_ComboFormAllBases(t *testing.T) { + dir := t.TempDir() + + digest := strings.Repeat("ab12", 16) // 64 hex chars + var lines []string + for _, base := range devLocallyBuiltImageBases { + lines = append(lines, + "image: "+base+":b13254e@sha256:"+digest, // combo tag+digest + "image: "+base+"@sha256:"+digest, // digest-only + "image: "+base+":b13254e", // short-SHA tag + ) + } + path := filepath.Join(dir, "synthetic.yaml") + if err := os.WriteFile(path, []byte(strings.Join(lines, "\n")+"\n"), 0o600); err != nil { + t.Fatalf("write synthetic manifest: %v", err) + } + + if err := rewriteDevDigestPins(dir, "dev-test"); err != nil { + t.Fatalf("rewriteDevDigestPins: %v", err) + } + + data, err := os.ReadFile(path) + if err != nil { + t.Fatalf("read rewritten manifest: %v", err) + } + out := string(data) + + if strings.Contains(out, "@sha256:") { + t.Errorf("rewrite left a @sha256: pin behind (orphan-suffix combo bug):\n%s", out) + } + for _, base := range devLocallyBuiltImageBases { + want := "image: " + base + ":dev-test" + if got := strings.Count(out, want); got != 3 { + t.Errorf("base %s: %d of 3 pin styles rewritten to %q:\n%s", base, got, want, out) + } + } +} + func TestDevImageTag_Format(t *testing.T) { // Tests run inside the git checkout, so expect dev-; tolerate the // :latest fallback for non-git build environments. diff --git a/internal/embed/bountytasks/benchlocal/report.a2ui.json b/internal/embed/bountytasks/benchlocal/report.a2ui.json new file mode 100644 index 00000000..fd53f2a1 --- /dev/null +++ b/internal/embed/bountytasks/benchlocal/report.a2ui.json @@ -0,0 +1,65 @@ +{ + "$comment": "benchlocal@v1 deliverable as an ordered A2UI v1.0-candidate message list against the STANDARD basic catalog. The runner fills updateDataModel from the pack's BenchmarkScore (totalScore + categories) with display-ready strings; machine truth lives in the results.json artifact. Category rows bind relative to each /categories item.", + "messages": [ + { + "version": "v1.0", + "createSurface": { + "surfaceId": "obol-bounty-benchlocal-report", + "catalogId": "https://a2ui.org/specification/v1_0/catalogs/basic/catalog.json" + } + }, + { + "version": "v1.0", + "updateComponents": { + "surfaceId": "obol-bounty-benchlocal-report", + "components": [ + { "id": "root", "component": "Card", "child": "layout" }, + { + "id": "layout", "component": "Column", + "children": [ + "title", "subtitle", "verdict", "div-score", + "total-score", "categories-list", "scenarios-summary", + "div-provenance", "provenance" + ] + }, + { "id": "title", "component": "Text", "variant": "h2", "text": { "path": "/model/name" } }, + { "id": "subtitle", "component": "Text", "variant": "caption", "text": { "path": "/model/subtitle" } }, + { "id": "verdict", "component": "Text", "variant": "h3", "text": { "path": "/verdict/label" } }, + { "id": "div-score", "component": "Divider" }, + { "id": "total-score", "component": "Text", "variant": "h3", "text": { "path": "/totalScore" } }, + { + "id": "categories-list", "component": "List", "direction": "vertical", + "children": { "componentId": "category-row", "path": "/categories" } + }, + { + "id": "category-row", "component": "Row", + "children": ["c-label", "c-score"] + }, + { "id": "c-label", "component": "Text", "text": { "path": "/label" } }, + { "id": "c-score", "component": "Text", "text": { "path": "/score" } }, + { "id": "scenarios-summary", "component": "Text", "variant": "caption", "text": { "path": "/scenarios/summary" } }, + { "id": "div-provenance", "component": "Divider" }, + { "id": "provenance", "component": "Text", "variant": "caption", "text": { "path": "/provenance/line" } } + ] + } + }, + { + "version": "v1.0", + "updateDataModel": { + "surfaceId": "obol-bounty-benchlocal-report", + "path": "/", + "value": { + "$comment": "Filled by the runner; the shape below is the contract (display-ready placeholders).", + "model": { "name": "", "subtitle": "" }, + "verdict": { "label": "" }, + "totalScore": "", + "categories": [ + { "label": "", "score": "" } + ], + "scenarios": { "summary": "" }, + "provenance": { "line": "" } + } + } + } + ] +} diff --git a/internal/embed/bountytasks/benchlocal/report.app.html b/internal/embed/bountytasks/benchlocal/report.app.html new file mode 100644 index 00000000..d1978d75 --- /dev/null +++ b/internal/embed/bountytasks/benchlocal/report.app.html @@ -0,0 +1,37 @@ + + + + + + {{model.name}} — benchlocal@v1 + + + +

{{model.name}} {{model.subtitle}}

+

{{verdict.label}}

+

{{totalScore}}

+ + + {{categories.rows}} +
categoryscore
+

{{scenarios.summary}}

+
{{provenance.line}}
+ + diff --git a/internal/embed/bountytasks/benchlocal/task.yaml b/internal/embed/bountytasks/benchlocal/task.yaml new file mode 100644 index 00000000..fbc29e49 --- /dev/null +++ b/internal/embed/bountytasks/benchlocal/task.yaml @@ -0,0 +1,94 @@ +# benchlocal@v1 — a ServiceBounty task type wrapping BenchLocal bench packs +# (github.com/stevibe/BenchLocal): installable TypeScript packs (ToolCall-15, +# CLI-40, BugFind-15, …) run against any OpenAI-compatible endpoint, emitting +# per-scenario pass|partial|fail and an aggregated BenchmarkScore +# {totalScore, categories[{id,label,score,weight}]}. +# +# Integrity note (the reason packCommit is REQUIRED): the BenchLocal registry +# resolves packs as {id, version, source:{repo, tag}} with NO checksums or +# signatures, and a git tag is mutable. A pack is CODE — the scorer ships +# inside it — so rerun-tolerance verification is meaningless unless poster, +# fulfiller, and evaluators all run byte-identical pack bytes. The commit SHA +# (or archive sha256) is that pin. +id: benchlocal +version: 1 +runner: benchlocal # host-side runner drives the BenchLocal agent control API +enabled: true +summary: Run a pinned BenchLocal bench pack against a model on declared hardware; verified by independent re-run within tolerance. + +requires: + - benchlocal + +params: + - name: pack + type: string + required: true + description: BenchLocal pack id from the registry (e.g. toolcall-15, cli-40, bugfind-15). + - name: packVersion + type: string + required: true + description: Pack version (the registry source.tag, e.g. 1.0.0). + - name: packCommit + type: string + required: true + description: Git commit SHA (or archive sha256) pinning the pack bytes — the registry has no checksums and tags are mutable; evaluators must re-run identical scoring code. + - name: scenarios + type: string + description: Optional comma-separated scenario-id filter (default = the whole pack). + - name: hardwareClass + type: string + default: any + description: Declared hardware class — free-form so specific GPUs are expressible (e.g. RTX-4090, H100, M4-Max). + - name: temperature + type: string + default: "0" + description: Sampling temperature; BenchLocal packs default to 0 (greedy) — keep 0 for reproducible scoring. There is no seed surface in the bench protocol. + +# BenchmarkScore.totalScore is the primary gate; per-category bands (metric = +# the category id, e.g. overall) can be added per-bounty via --tolerance. +acceptance: + method: rerun-tolerance + commitReveal: true + tolerance: + totalScore: "0.05" + +eval: + defaultK: 3 # median-of-k; k>=3 whenever a probation seat is occupied + selection: vrf-reputation-weighted + payment: + asset: OBOL + perEvaluator: "2.00" + fundedBy: poster + settle: batch-settlement + ladder: + shadowAgreements: 5 + probationEvals: 10 + probationValueCap: "50.00" + revealWindow: 10m + nonRevealPenalty: outlier + decayHalfLife: 720h + escalationWindow: 30m + escalationEpsilon: 5 + +# Pack scores are hardware-agnostic (pass/fail scoring), so self-report is the +# honest default; bounties pinning a specific GPU should post with +# --hardware-proof gpu-attestation or evaluator-measured. +hardwareProof: self-report + +deliverable: + report: + variants: + - kind: declarative + surface: report.a2ui.json + catalogId: https://a2ui.org/specification/v1_0/catalogs/basic/catalog.json + - kind: mcp-app + surface: report.app.html + catalogId: obol.org:mcp-app/v1 + gate: local # local | mcp-x402 | sign-in-with-x + artifacts: + - name: results.json + kind: eval-report + required: true + - name: run.manifest + kind: provenance + required: true diff --git a/internal/embed/bountytasks/benchmark/report.a2ui.json b/internal/embed/bountytasks/benchmark/report.a2ui.json new file mode 100644 index 00000000..7cbc1140 --- /dev/null +++ b/internal/embed/bountytasks/benchmark/report.a2ui.json @@ -0,0 +1,80 @@ +{ + "$comment": "benchmark@v1 deliverable as an ordered A2UI v1.0-candidate message list (a2ui-project/a2ui, specification/v1_0). Targets the STANDARD basic catalog, so any v1.0 renderer (obol FE react renderer included) draws the scorecard — no custom catalog required. The runner fills updateDataModel with display-ready strings; machine-readable numbers live in the results.json artifact (kind: eval-report), which is the agent-facing source of truth. List rows bind relative to each /scores item.", + "messages": [ + { + "version": "v1.0", + "createSurface": { + "surfaceId": "obol-bounty-benchmark-report", + "catalogId": "https://a2ui.org/specification/v1_0/catalogs/basic/catalog.json" + } + }, + { + "version": "v1.0", + "updateComponents": { + "surfaceId": "obol-bounty-benchmark-report", + "components": [ + { "id": "root", "component": "Card", "child": "layout" }, + { + "id": "layout", "component": "Column", + "children": [ + "title", "subtitle", "verdict", "div-scores", + "scores-header", "scores-list", "div-hardware", + "hw-title", "hw-class", "hw-proof", "hw-throughput", + "div-provenance", "provenance" + ] + }, + { "id": "title", "component": "Text", "variant": "h2", "text": { "path": "/model/name" } }, + { "id": "subtitle", "component": "Text", "variant": "caption", "text": { "path": "/model/subtitle" } }, + { "id": "verdict", "component": "Text", "variant": "h3", "text": { "path": "/verdict/label" } }, + { "id": "div-scores", "component": "Divider" }, + { + "id": "scores-header", "component": "Row", + "children": ["h-task", "h-claimed", "h-verified", "h-tolerance", "h-pass"] + }, + { "id": "h-task", "component": "Text", "variant": "caption", "text": "task" }, + { "id": "h-claimed", "component": "Text", "variant": "caption", "text": "claimed" }, + { "id": "h-verified", "component": "Text", "variant": "caption", "text": "verified" }, + { "id": "h-tolerance", "component": "Text", "variant": "caption", "text": "tolerance" }, + { "id": "h-pass", "component": "Text", "variant": "caption", "text": "pass" }, + { + "id": "scores-list", "component": "List", "direction": "vertical", + "children": { "componentId": "score-row", "path": "/scores" } + }, + { + "id": "score-row", "component": "Row", + "children": ["s-task", "s-claimed", "s-verified", "s-tolerance", "s-pass"] + }, + { "id": "s-task", "component": "Text", "text": { "path": "/task" } }, + { "id": "s-claimed", "component": "Text", "text": { "path": "/claimed" } }, + { "id": "s-verified", "component": "Text", "text": { "path": "/verified" } }, + { "id": "s-tolerance", "component": "Text", "text": { "path": "/tolerance" } }, + { "id": "s-pass", "component": "Text", "text": { "path": "/pass" } }, + { "id": "div-hardware", "component": "Divider" }, + { "id": "hw-title", "component": "Text", "variant": "h3", "text": "Hardware" }, + { "id": "hw-class", "component": "Text", "text": { "path": "/hardware/class" } }, + { "id": "hw-proof", "component": "Text", "text": { "path": "/hardware/proof" } }, + { "id": "hw-throughput", "component": "Text", "text": { "path": "/hardware/throughput" } }, + { "id": "div-provenance", "component": "Divider" }, + { "id": "provenance", "component": "Text", "variant": "caption", "text": { "path": "/provenance/line" } } + ] + } + }, + { + "version": "v1.0", + "updateDataModel": { + "surfaceId": "obol-bounty-benchmark-report", + "path": "/", + "value": { + "$comment": "Filled by the runner; the shape below is the contract (display-ready placeholders).", + "model": { "name": "", "subtitle": "" }, + "verdict": { "label": "" }, + "scores": [ + { "task": "", "claimed": "", "verified": "", "tolerance": "", "pass": "" } + ], + "hardware": { "class": "", "proof": "", "throughput": "" }, + "provenance": { "line": "" } + } + } + } + ] +} diff --git a/internal/embed/bountytasks/benchmark/report.app.html b/internal/embed/bountytasks/benchmark/report.app.html new file mode 100644 index 00000000..3a0140d9 --- /dev/null +++ b/internal/embed/bountytasks/benchmark/report.app.html @@ -0,0 +1,40 @@ + + + + + + {{model.name}} — benchmark@v1 + + + +

{{model.name}} {{model.subtitle}}

+

{{verdict.label}}

+ + + {{scores.rows}} +
taskclaimedverifiedtolerancepass
+
+
hardwareClass
{{hardware.class}}
+
proof
{{hardware.proof}}
+
throughput
{{hardware.throughput}}
+
+
{{provenance.line}}
+ + diff --git a/internal/embed/bountytasks/benchmark/task.yaml b/internal/embed/bountytasks/benchmark/task.yaml new file mode 100644 index 00000000..4d4f3504 --- /dev/null +++ b/internal/embed/bountytasks/benchmark/task.yaml @@ -0,0 +1,118 @@ +# benchmark@v1 — a ServiceBounty task type. +# +# A task type is a self-describing, prepackaged unit (mirrors the dynamic +# network-install registry in internal/embed/networks/). It owns: the param +# schema that generates `obol bounty post benchmark` flags, the eval method + +# tolerance, the OBOL eval pricing, the hardware-proof policy, and the A2UI +# report schema. New task types drop in as a directory here — no CRD or CLI +# change. `enabled: false` ships a type that isn't live yet (e.g. finetune). +id: benchmark +version: 1 +runner: bench # the BountyRunner that fulfills it (host-side) +enabled: true +summary: Benchmark a model on a pinned harness; verified by independent re-run within tolerance. + +# Capability tags a fulfiller node must advertise to claim. +requires: + - benchmark + +# params → generated `obol bounty post benchmark` flags, validated against +# spec.task.params at admission. type: string|int|enum; enum lists choices. +params: + - name: tasks + type: string + default: "mmlu,gsm8k" + description: Comma-separated harness tasks to run. + - name: harness + type: string + default: "lm-eval-harness@v0.4.3" + description: Pinned eval harness (name@version), content-addressed. + - name: hardwareClass + type: string + default: any + description: Declared hardware class — free-form so specific GPUs are expressible (e.g. any, M4-Max, H100, B200, RTX-4090); informational unless hardwareProof requires more. + - name: seed + type: string + default: "1234" + description: Decode seed; pinned to shrink nondeterminism (greedy decode assumed). + - name: dtype + type: enum + enum: [fp16, bf16, fp8, int8] + default: fp16 + description: Inference dtype. + +# acceptance — benchmarks are NOT bit-exact. An independent re-run must +# reproduce the claimed score within `tolerance`. commitHash is integrity +# (anti bait-and-switch), never a determinism gate. +acceptance: + method: rerun-tolerance + commitReveal: true + tolerance: # per-metric absolute score band; poster may tighten + mmlu: "0.01" + gsm8k: "0.015" + humaneval: "0.02" + +# eval market — a SEPARATE OBOL payment leg from the reward (x402 can't splice +# a fee out of the reward auth). Evaluators are paid for the WORK, pass or fail. +eval: + defaultK: 3 # median-of-k; k>=3 whenever a probation seat is occupied + selection: vrf-reputation-weighted + payment: + asset: OBOL + perEvaluator: "2.00" + fundedBy: poster + settle: batch-settlement + # ladder — evaluator cold-start (design doc §11.4): Shadow (free, randomly + # assigned, graded against the quorum median but never counted) → Probation + # (one reserved quorum seat at ~50% pay, value-capped bounties only) → Full. + # Thresholds are per-type constants so each task type tunes its own on-ramp. + ladder: + shadowAgreements: 5 # shadow verdicts within tolerance of the quorum median → Probation + probationEvals: 10 # paid evals without divergence → Full + probationValueCap: "50.00" # reward (human units) above which no probation seat is offered + revealWindow: 10m # commit→reveal window; every commit closes before any reveal opens + nonRevealPenalty: outlier # non-reveal is graded as a worst-case outlier (>= divergence penalty) + decayHalfLife: 720h # reputation half-life: ladder weight halves per window of inactivity + escalationWindow: 30m # second-round commit→reveal window when a diverged quorum escalates + escalationEpsilon: 5 # max spread (score points) between counting reveals before escalation + +# hardwareProof — self-report is a reputation-backed CLAIM (forgeable text). +# Throughput-flavored bounties should require gpu-attestation or +# evaluator-measured; a score (e.g. mmlu) is hardware-agnostic so self-report +# is fine there. +hardwareProof: self-report + +# deliverable — A2UI renderings of the SAME verified result data, in +# preference order. The serving side (FE locally, the stack MCP server +# cross-party) picks the first variant whose catalogId the client advertises +# (a2ui catalog negotiation, locked per surface); no match → raw artifacts. +# declarative: operations JSON rendered natively from the client's +# compiled-in catalog — no custom code, no iframes (the lean default). +# mcp-app: MCP-Apps escape hatch for clients that don't know our catalog — +# self-contained HTML served url_encoded in a `custom` McpApp node's +# properties.content. The CLIENT supplies double-iframe isolation +# (sandbox proxy + srcdoc inner frame, never allow-same-origin); the +# server only ever returns JSON. Keep the HTML self-contained: the inner +# frame has no storage, no cookies, no same-origin access. +# Locally the agent persists the report under its hierarchy and the FE reads +# it; cross-party serves gate it behind mcp-x402 / SIWx (+payment for resale). +deliverable: + report: + variants: + - kind: declarative # A2UI v1.0-candidate messages against the + surface: report.a2ui.json # STANDARD basic catalog — any v1.0 renderer + catalogId: https://a2ui.org/specification/v1_0/catalogs/basic/catalog.json + - kind: mcp-app + surface: report.app.html # any MCP-Apps host renders this, sandboxed + catalogId: obol.org:mcp-app/v1 # domain-prefixed id (spec convention) for the McpApp custom-node rendering + gate: local # local | mcp-x402 | sign-in-with-x + artifacts: + - name: results.json + kind: eval-report + required: true + - name: run.manifest + kind: provenance + required: true + - name: hw-attestation.json + kind: hardware-proof + required: false diff --git a/internal/embed/bountytasks/finetune/report.a2ui.json b/internal/embed/bountytasks/finetune/report.a2ui.json new file mode 100644 index 00000000..c03f6e0f --- /dev/null +++ b/internal/embed/bountytasks/finetune/report.a2ui.json @@ -0,0 +1,54 @@ +{ + "$comment": "finetune@v1 deliverable as an ordered A2UI v1.0-candidate message list against the STANDARD basic catalog. STAGED with the package (enabled: false); the runner fills updateDataModel from the held-out re-eval. Metric rows bind relative to each /metrics item.", + "messages": [ + { + "version": "v1.0", + "createSurface": { + "surfaceId": "obol-bounty-finetune-report", + "catalogId": "https://a2ui.org/specification/v1_0/catalogs/basic/catalog.json" + } + }, + { + "version": "v1.0", + "updateComponents": { + "surfaceId": "obol-bounty-finetune-report", + "components": [ + { "id": "root", "component": "Card", "child": "layout" }, + { + "id": "layout", "component": "Column", + "children": ["title", "subtitle", "verdict", "div-metrics", "metrics-list", "div-provenance", "provenance"] + }, + { "id": "title", "component": "Text", "variant": "h2", "text": { "path": "/model/name" } }, + { "id": "subtitle", "component": "Text", "variant": "caption", "text": { "path": "/model/subtitle" } }, + { "id": "verdict", "component": "Text", "variant": "h3", "text": { "path": "/verdict/label" } }, + { "id": "div-metrics", "component": "Divider" }, + { + "id": "metrics-list", "component": "List", "direction": "vertical", + "children": { "componentId": "metric-row", "path": "/metrics" } + }, + { "id": "metric-row", "component": "Row", "children": ["m-name", "m-value"] }, + { "id": "m-name", "component": "Text", "text": { "path": "/name" } }, + { "id": "m-value", "component": "Text", "text": { "path": "/value" } }, + { "id": "div-provenance", "component": "Divider" }, + { "id": "provenance", "component": "Text", "variant": "caption", "text": { "path": "/provenance/line" } } + ] + } + }, + { + "version": "v1.0", + "updateDataModel": { + "surfaceId": "obol-bounty-finetune-report", + "path": "/", + "value": { + "$comment": "Filled by the runner; the shape below is the contract (display-ready placeholders).", + "model": { "name": "", "subtitle": "" }, + "verdict": { "label": "" }, + "metrics": [ + { "name": "", "value": "" } + ], + "provenance": { "line": "" } + } + } + } + ] +} diff --git a/internal/embed/bountytasks/finetune/task.yaml b/internal/embed/bountytasks/finetune/task.yaml new file mode 100644 index 00000000..635eec56 --- /dev/null +++ b/internal/embed/bountytasks/finetune/task.yaml @@ -0,0 +1,83 @@ +# finetune@v1 — STAGED (enabled: false). Ships in the binary so the schema and +# eval policy are reviewable, but it is not postable: `obol bounty post` only +# generates subcommands for enabled types and bounty.Resolve refuses disabled +# refs at admission. Flipped on when the MLX-LoRA runner + held-out re-eval +# verification land (fine-tunes verify by inference-only re-eval of the +# committed checkpoint — never by re-training, which is non-deterministic and +# cost-prohibitive). +id: finetune +version: 1 +runner: mlx-lora +enabled: false +summary: Fine-tune a model on a committed dataset; verified by held-out re-eval of the committed checkpoint. + +requires: + - finetune.mlx + +params: + - name: dataset + type: string + required: true + description: Content-addressed training dataset URI (e.g. ipfs://… or https://…#sha256=…). + - name: epochs + type: string + default: "3" + description: Training epochs. + - name: learningRate + type: string + default: "1e-4" + description: Learning rate. + - name: loraRank + type: string + default: "32" + description: LoRA adapter rank. + - name: hardwareClass + type: string + default: any + description: Declared hardware class — free-form so specific GPUs are expressible. + +# harness-rerun: evaluators re-eval the COMMITTED checkpoint (modelHash binds +# it at submit) on the held-out fraction; scores must land within tolerance. +acceptance: + method: harness-rerun + commitReveal: true + tolerance: + evalLoss: "0.05" + +eval: + defaultK: 3 + selection: vrf-reputation-weighted + payment: + asset: OBOL + perEvaluator: "2.00" + fundedBy: poster + settle: batch-settlement + ladder: + shadowAgreements: 5 + probationEvals: 10 + probationValueCap: "50.00" + revealWindow: 10m + nonRevealPenalty: outlier + decayHalfLife: 720h + escalationWindow: 30m + escalationEpsilon: 5 + +hardwareProof: self-report + +deliverable: + report: + variants: + - kind: declarative + surface: report.a2ui.json + catalogId: https://a2ui.org/specification/v1_0/catalogs/basic/catalog.json + gate: local + artifacts: + - name: adapter.safetensors + kind: weights + required: true + - name: eval.json + kind: eval-report + required: true + - name: run.manifest + kind: provenance + required: true diff --git a/internal/embed/embed.go b/internal/embed/embed.go index 1deb3df4..f2f1df76 100644 --- a/internal/embed/embed.go +++ b/internal/embed/embed.go @@ -29,6 +29,9 @@ var networksFS embed.FS //go:embed all:skills var skillsFS embed.FS +//go:embed all:bountytasks +var bountyTasksFS embed.FS + // InfrastructureDigest returns a stable digest of the embedded infrastructure // assets. Callers use this to decide whether an existing copied defaults tree // needs to be refreshed from the current binary. @@ -145,6 +148,40 @@ func ReadEmbeddedNetworkFile(networkName, filename string) ([]byte, error) { return content, nil } +// GetAvailableBountyTasks returns the names of all embedded ServiceBounty +// task-type packages (one directory per type under bountytasks/), e.g. +// "benchmark". Mirrors GetAvailableNetworks — drop in a directory to add a +// task type. +func GetAvailableBountyTasks() ([]string, error) { + entries, err := fs.ReadDir(bountyTasksFS, "bountytasks") + if err != nil { + return nil, fmt.Errorf("failed to read embedded bountytasks directory: %w", err) + } + + var tasks []string + + for _, entry := range entries { + if entry.IsDir() { + tasks = append(tasks, entry.Name()) + } + } + + return tasks, nil +} + +// ReadEmbeddedBountyTaskFile reads a file (e.g. "task.yaml", +// "report.a2ui.json") from an embedded task-type package. +func ReadEmbeddedBountyTaskFile(taskName, filename string) ([]byte, error) { + path := filepath.Join("bountytasks", taskName, filename) + + content, err := bountyTasksFS.ReadFile(path) + if err != nil { + return nil, fmt.Errorf("failed to read %s from bounty task %s: %w", filename, taskName, err) + } + + return content, nil +} + // ReadInfrastructureFile reads a file from the embedded infrastructure directory func ReadInfrastructureFile(path string) ([]byte, error) { content, err := infrastructureFS.ReadFile(filepath.Join("infrastructure", path)) diff --git a/internal/embed/embed_bounty_crd_parity_test.go b/internal/embed/embed_bounty_crd_parity_test.go new file mode 100644 index 00000000..7a581a48 --- /dev/null +++ b/internal/embed/embed_bounty_crd_parity_test.go @@ -0,0 +1,181 @@ +package embed + +import ( + "fmt" + "reflect" + "sort" + "strings" + "testing" + + "gopkg.in/yaml.v3" + + "github.com/ObolNetwork/obol-stack/internal/monetizeapi" +) + +// The ServiceBounty CRD manifest is hand-written (no controller-gen run), so +// every new Go field needs a matching schema property and vice versa. This +// test walks both directions: a Go json tag without a CRD property means +// kubectl silently strips the field on apply (structural-schema pruning); a +// CRD property without a Go field means stale schema the controller can never +// reconcile. spec.eval.mode was added by hand in two places — this makes that +// class of drift impossible. + +// leafTypes are struct types serialized as scalars in the CRD schema. +var leafTypes = map[string]bool{ + "v1.Time": true, + "v1.Duration": true, +} + +// collectGoPaths walks a struct type and records every reachable json path. +// Arrays descend through "[]"; maps are leaves (additionalProperties). +func collectGoPaths(t reflect.Type, prefix string, out map[string]bool) { + for t.Kind() == reflect.Pointer { + t = t.Elem() + } + if t.Kind() != reflect.Struct || leafTypes[t.String()] { + return + } + for i := 0; i < t.NumField(); i++ { + f := t.Field(i) + tag := strings.Split(f.Tag.Get("json"), ",")[0] + if tag == "" || tag == "-" { + continue + } + path := prefix + tag + out[path] = true + + ft := f.Type + for ft.Kind() == reflect.Pointer { + ft = ft.Elem() + } + switch ft.Kind() { + case reflect.Struct: + collectGoPaths(ft, path+".", out) + case reflect.Slice: + el := ft.Elem() + if el.Kind() == reflect.Struct && !leafTypes[el.String()] { + collectGoPaths(el, path+"[].", out) + } + } + } +} + +// collectSchemaPaths walks an openAPIV3Schema properties tree. +func collectSchemaPaths(schema map[string]any, prefix string, out map[string]bool) { + props, _ := schema["properties"].(map[string]any) + for name, raw := range props { + path := prefix + name + out[path] = true + node, ok := raw.(map[string]any) + if !ok { + continue + } + if items, ok := node["items"].(map[string]any); ok { + collectSchemaPaths(items, path+"[].", out) + continue + } + collectSchemaPaths(node, path+".", out) + } +} + +func loadBountySchema(t *testing.T) map[string]any { + t.Helper() + return loadCRDSchema(t, "base/templates/servicebounty-crd.yaml") +} + +func loadCRDSchema(t *testing.T, path string) map[string]any { + t.Helper() + data, err := ReadInfrastructureFile(path) + if err != nil { + t.Fatalf("ReadInfrastructureFile: %v", err) + } + var crd map[string]any + if err := yaml.Unmarshal(data, &crd); err != nil { + t.Fatalf("parse CRD: %v", err) + } + versions, _ := nested(crd, "spec", "versions").([]any) + if len(versions) == 0 { + t.Fatal("CRD has no versions") + } + v0, _ := versions[0].(map[string]any) + schema, _ := nested(v0, "schema", "openAPIV3Schema").(map[string]any) + if schema == nil { + t.Fatal("CRD has no openAPIV3Schema") + } + return schema +} + +func TestServiceBountyCRD_GoSchemaParity(t *testing.T) { + assertCRDParity(t, loadBountySchema(t), + reflect.TypeOf(monetizeapi.ServiceBountySpec{}), + reflect.TypeOf(monetizeapi.ServiceBountyStatus{})) +} + +// The EvaluatorEnrollment CRD is hand-written too — same drift class, same +// bidirectional pin. +func TestEvaluatorEnrollmentCRD_GoSchemaParity(t *testing.T) { + assertCRDParity(t, loadCRDSchema(t, "base/templates/evaluatorenrollment-crd.yaml"), + reflect.TypeOf(monetizeapi.EvaluatorEnrollmentSpec{}), + reflect.TypeOf(monetizeapi.EvaluatorEnrollmentStatus{})) +} + +func assertCRDParity(t *testing.T, schema map[string]any, specType, statusType reflect.Type) { + t.Helper() + for _, section := range []struct { + name string + goType reflect.Type + }{ + {"spec", specType}, + {"status", statusType}, + } { + sectionSchema, _ := nested(schema, "properties", section.name).(map[string]any) + if sectionSchema == nil { + t.Fatalf("CRD schema missing .%s", section.name) + } + + goPaths := map[string]bool{} + collectGoPaths(section.goType, "", goPaths) + schemaPaths := map[string]bool{} + collectSchemaPaths(sectionSchema, "", schemaPaths) + + var missing, stale []string + for p := range goPaths { + if !schemaPaths[p] { + missing = append(missing, p) + } + } + for p := range schemaPaths { + if !goPaths[p] { + stale = append(stale, p) + } + } + sort.Strings(missing) + sort.Strings(stale) + + for _, p := range missing { + t.Errorf("%s.%s exists in Go but not in the CRD schema — kubectl apply would silently prune it", section.name, p) + } + for _, p := range stale { + t.Errorf("%s.%s exists in the CRD schema but not in Go — stale property the controller can never reconcile", section.name, p) + } + } +} + +// TestServiceBountyCRD_EvalModeEnum pins the verification-gate enum: required +// must stay the default and dangerouslySkipped the only opt-out. +func TestServiceBountyCRD_EvalModeEnum(t *testing.T) { + schema := loadBountySchema(t) + mode, _ := nested(schema, "properties", "spec", "properties", "eval", "properties", "mode").(map[string]any) + if mode == nil { + t.Fatal("spec.eval.mode missing from CRD schema") + } + if d, _ := mode["default"].(string); d != monetizeapi.EvalModeRequired { + t.Errorf("spec.eval.mode default = %q, want %q (verification is on by default)", d, monetizeapi.EvalModeRequired) + } + enum, _ := mode["enum"].([]any) + got := fmt.Sprintf("%v", enum) + want := fmt.Sprintf("%v", []any{monetizeapi.EvalModeRequired, monetizeapi.EvalModeDangerouslySkipped}) + if got != want { + t.Errorf("spec.eval.mode enum = %s, want %s", got, want) + } +} diff --git a/internal/embed/embed_bounty_rbac_test.go b/internal/embed/embed_bounty_rbac_test.go new file mode 100644 index 00000000..ab22c4cf --- /dev/null +++ b/internal/embed/embed_bounty_rbac_test.go @@ -0,0 +1,119 @@ +package embed + +import ( + "strings" + "testing" +) + +// The bounty RBAC posture is a reviewed security decision (see +// plans/bounty-ane-marketplace-design.md, review fix #2): the controller gets +// cluster-wide watch/status on servicebounties, the AGENT grant is a +// NAMESPACED Role in the hermes mother namespace — never the cluster-wide +// openclaw-monetize-write ClusterRole. These tests pin that decision. + +func TestBountyRBAC_ControllerClusterRoleIncludesServiceBounties(t *testing.T) { + data, err := ReadInfrastructureFile("base/templates/x402.yaml") + if err != nil { + t.Fatalf("ReadInfrastructureFile: %v", err) + } + + docs := multiDoc(data) + var controllerRole map[string]any + for _, d := range docs { + if d["kind"] == "ClusterRole" && nested(d, "metadata", "name") == "serviceoffer-controller" { + controllerRole = d + break + } + } + if controllerRole == nil { + t.Fatal("serviceoffer-controller ClusterRole not found in x402.yaml") + } + + var hasBounties, hasBountyStatus bool + var hasEnrollments, hasEnrollmentStatus bool + var enrollmentVerbs []any + rules, _ := controllerRole["rules"].([]any) + for _, r := range rules { + rule, _ := r.(map[string]any) + resources, _ := rule["resources"].([]any) + for _, res := range resources { + switch res { + case "servicebounties": + hasBounties = true + case "servicebounties/status": + hasBountyStatus = true + case "evaluatorenrollments": + hasEnrollments = true + enrollmentVerbs, _ = rule["verbs"].([]any) + case "evaluatorenrollments/status": + hasEnrollmentStatus = true + } + } + } + if !hasBounties || !hasBountyStatus { + t.Errorf("serviceoffer-controller ClusterRole missing servicebounties (%v) or servicebounties/status (%v)", hasBounties, hasBountyStatus) + } + if !hasEnrollments || !hasEnrollmentStatus { + t.Errorf("serviceoffer-controller ClusterRole missing evaluatorenrollments (%v) or evaluatorenrollments/status (%v)", hasEnrollments, hasEnrollmentStatus) + } + // The controller READS the pool and writes ladder STATE only — it never + // creates or deletes enrollments (evaluators own their enrollment). + for _, verb := range enrollmentVerbs { + if verb == "create" || verb == "delete" { + t.Errorf("controller must not %v evaluatorenrollments — the pool is evaluator-owned", verb) + } + } +} + +func TestBountyRBAC_AgentGrantIsNamespacedNotClusterWide(t *testing.T) { + data, err := ReadInfrastructureFile("base/templates/obol-agent-monetize-rbac.yaml") + if err != nil { + t.Fatalf("ReadInfrastructureFile: %v", err) + } + + docs := multiDoc(data) + + // 1. The cluster-wide write ClusterRole must NOT mention servicebounties. + for _, d := range docs { + if d["kind"] != "ClusterRole" { + continue + } + name, _ := nested(d, "metadata", "name").(string) + rules, _ := d["rules"].([]any) + for _, r := range rules { + rule, _ := r.(map[string]any) + resources, _ := rule["resources"].([]any) + for _, res := range resources { + if s, _ := res.(string); strings.Contains(s, "servicebounties") { + t.Errorf("ClusterRole %q grants %q — bounty write must stay a namespaced Role", name, s) + } + } + } + } + + // 2. The namespaced Role exists, in the hermes mother namespace. + var role map[string]any + for _, d := range docs { + if d["kind"] == "Role" && nested(d, "metadata", "name") == "hermes-bounty-write" { + role = d + break + } + } + if role == nil { + t.Fatal("namespaced Role hermes-bounty-write not found") + } + if ns := nested(role, "metadata", "namespace"); ns != "hermes-obol-agent" { + t.Errorf("hermes-bounty-write namespace = %v, want hermes-obol-agent", ns) + } + + var binding map[string]any + for _, d := range docs { + if d["kind"] == "RoleBinding" && nested(d, "metadata", "name") == "hermes-bounty-write-binding" { + binding = d + break + } + } + if binding == nil { + t.Fatal("RoleBinding hermes-bounty-write-binding not found") + } +} diff --git a/internal/embed/embed_crd_test.go b/internal/embed/embed_crd_test.go index 67f3b7e9..27cff1d5 100644 --- a/internal/embed/embed_crd_test.go +++ b/internal/embed/embed_crd_test.go @@ -293,6 +293,103 @@ func TestServiceOfferCRD_WalletValidation(t *testing.T) { } } +// TestServiceOfferCRD_SkillFields guards the type=skill marketplace +// schema: the enum value, the spec.skill block (bundle identity + +// integrity hash + bundle ConfigMap reference), and the spec-level CEL +// rule that makes spec.skill mandatory for skill offers. +func TestServiceOfferCRD_SkillFields(t *testing.T) { + data, err := ReadInfrastructureFile("base/templates/serviceoffer-crd.yaml") + if err != nil { + t.Fatalf("ReadInfrastructureFile: %v", err) + } + + crd := findDoc(multiDoc(data), "CustomResourceDefinition") + if crd == nil { + t.Fatal("no CRD document found") + } + + versions := nested(crd, "spec", "versions").([]any) + v0 := versions[0].(map[string]any) + spec, ok := nested(v0, "schema", "openAPIV3Schema", "properties", "spec").(map[string]any) + if !ok { + t.Fatal("spec schema missing") + } + props := spec["properties"].(map[string]any) + + // type enum gains "skill". + typeProp := props["type"].(map[string]any) + gotEnum := map[string]bool{} + for _, e := range typeProp["enum"].([]any) { + gotEnum[e.(string)] = true + } + if !gotEnum["skill"] { + t.Errorf("spec.type.enum = %v, want it to include skill", typeProp["enum"]) + } + + // spec.skill block with required identity + integrity fields. + skill, ok := props["skill"].(map[string]any) + if !ok { + t.Fatal("spec.skill property missing") + } + required := map[string]bool{} + for _, r := range skill["required"].([]any) { + required[r.(string)] = true + } + for _, want := range []string{"name", "version", "sha256", "bundleConfigMap"} { + if !required[want] { + t.Errorf("spec.skill.required missing %q (got %v)", want, skill["required"]) + } + } + + skillProps := skill["properties"].(map[string]any) + wantPatterns := map[string]string{ + "name": "^[a-z0-9][a-z0-9-]*$", + "version": "^[A-Za-z0-9][A-Za-z0-9._-]*$", + "sha256": "^[a-f0-9]{64}$", + } + for field, want := range wantPatterns { + fp, ok := skillProps[field].(map[string]any) + if !ok { + t.Errorf("spec.skill.%s property missing", field) + continue + } + if fp["pattern"] != want { + t.Errorf("spec.skill.%s.pattern = %v, want %s", field, fp["pattern"], want) + } + } + + wantMaxLen := map[string]int{ + "name": 64, + "version": 64, + "bundleConfigMap": 253, + "displayName": 128, + "description": 1024, + } + for field, want := range wantMaxLen { + fp, ok := skillProps[field].(map[string]any) + if !ok { + t.Errorf("spec.skill.%s property missing", field) + continue + } + if fp["maxLength"] != want { + t.Errorf("spec.skill.%s.maxLength = %v, want %d", field, fp["maxLength"], want) + } + } + + // Spec-level CEL: spec.skill is required when type=skill. + rules, ok := spec["x-kubernetes-validations"].([]any) + if !ok { + t.Fatal("spec.x-kubernetes-validations missing") + } + joined := "" + for _, r := range rules { + joined += r.(map[string]any)["rule"].(string) + "\n" + } + if !strings.Contains(joined, "self.type != 'skill' || has(self.skill)") { + t.Errorf("spec CEL rules missing skill requirement; got:\n%s", joined) + } +} + func TestRegistrationRequestCRD_Parses(t *testing.T) { data, err := ReadInfrastructureFile("base/templates/registrationrequest-crd.yaml") if err != nil { @@ -1099,6 +1196,94 @@ func assertAgentRBACRulesTight(t *testing.T, roleName string, role map[string]an } } +// TestSkillPublishRBAC_NamespaceScopedConfigMapsOnly pins the shape of the +// skill-bundle publish grant. The agent self-publish path (`obol sell +// skill` from inside the mother agent) needs to write the bundle ConfigMap +// next to its ServiceOffer, but that grant must stay a NAMESPACED Role in +// hermes-obol-agent — a core/configmaps write on the cluster-wide +// openclaw-monetize-write ClusterRole would hand every agent write access +// to every namespace's ConfigMaps (LiteLLM config, x402 pricing, buyer +// auth pools) and is hard-failed by assertAgentRBACRulesTight above. +func TestSkillPublishRBAC_NamespaceScopedConfigMapsOnly(t *testing.T) { + data, err := ReadInfrastructureFile("base/templates/obol-agent-monetize-rbac.yaml") + if err != nil { + t.Fatalf("ReadInfrastructureFile: %v", err) + } + docs := multiDoc(data) + + role := findDocByName(docs, "Role", "hermes-skill-publish") + if role == nil { + t.Fatal("no Role 'hermes-skill-publish' found (must be a namespaced Role, not a ClusterRole)") + } + if findDocByName(docs, "ClusterRole", "hermes-skill-publish") != nil { + t.Fatal("hermes-skill-publish must not exist as a ClusterRole") + } + if ns := nested(role, "metadata", "namespace"); ns != "hermes-obol-agent" { + t.Errorf("Role namespace = %v, want hermes-obol-agent", ns) + } + + rules, ok := role["rules"].([]any) + if !ok || len(rules) != 1 { + t.Fatalf("hermes-skill-publish must carry exactly one rule, got %v", role["rules"]) + } + rule, ok := rules[0].(map[string]any) + if !ok { + t.Fatalf("malformed rule: %T", rules[0]) + } + + groups := stringSet(rule["apiGroups"]) + if len(groups) != 1 || !groups[""] { + t.Errorf("apiGroups = %v, want exactly [\"\"]", groups) + } + resources := stringSet(rule["resources"]) + if len(resources) != 1 || !resources["configmaps"] { + t.Errorf("resources = %v, want exactly [configmaps]", resources) + } + + verbs := stringSet(rule["verbs"]) + for _, want := range []string{"create", "get", "update", "patch"} { + if !verbs[want] { + t.Errorf("verbs missing %q: %v", want, verbs) + } + } + for _, banned := range []string{"list", "watch", "delete", "deletecollection", "*"} { + if verbs[banned] { + t.Errorf("verbs must not include %q: %v", banned, verbs) + } + } + if len(verbs) != 4 { + t.Errorf("verbs = %v, want exactly {create,get,update,patch}", verbs) + } + + // Never any secrets in this Role, under any rule shape. + for _, r := range rules { + rm, _ := r.(map[string]any) + if stringSet(rm["resources"])["secrets"] { + t.Error("hermes-skill-publish must never grant secrets access") + } + } + + binding := findDocByName(docs, "RoleBinding", "hermes-skill-publish-binding") + if binding == nil { + t.Fatal("no RoleBinding 'hermes-skill-publish-binding' found") + } + if ns := nested(binding, "metadata", "namespace"); ns != "hermes-obol-agent" { + t.Errorf("RoleBinding namespace = %v, want hermes-obol-agent", ns) + } + if ref := nested(binding, "roleRef", "kind"); ref != "Role" { + t.Errorf("roleRef.kind = %v, want Role", ref) + } + if ref := nested(binding, "roleRef", "name"); ref != "hermes-skill-publish" { + t.Errorf("roleRef.name = %v, want hermes-skill-publish", ref) + } + if !bindingHasSubject(binding, "hermes", "hermes-obol-agent") { + t.Error("binding missing hermes-obol-agent/hermes subject") + } + if bindingHasSubject(binding, "openclaw", "openclaw-obol-agent") { + t.Error("binding must not include the openclaw subject — the grant is hermes mother ns only") + } +} + func stringSet(v any) map[string]bool { out := make(map[string]bool) @@ -1224,6 +1409,7 @@ func TestAdmissionPolicy_Parses(t *testing.T) { "ForwardAuth middlewares must target x402-verifier.x402.svc", "Agent-created namespaces must be factory-owned agent-* namespaces", "Agent-created Secrets must be hermes-env or hermes-profile-seed inside agent-* namespaces", + "Agent-written ConfigMaps must be *-skill-bundle skill bundles (hermes-config and other operator ConfigMaps are off-limits)", "Agent-created Agent CRs must be Hermes agents in their matching agent-* namespace", } if len(validations) != len(wantMessages) { @@ -1262,3 +1448,82 @@ func TestAdmissionPolicy_Parses(t *testing.T) { t.Errorf("validationActions[0] = %v, want Deny", actions[0]) } } + +func TestServiceOfferCRD_CardPayment(t *testing.T) { + data, err := ReadInfrastructureFile("base/templates/serviceoffer-crd.yaml") + if err != nil { + t.Fatalf("ReadInfrastructureFile: %v", err) + } + + crd := findDoc(multiDoc(data), "CustomResourceDefinition") + if crd == nil { + t.Fatal("no CRD document found") + } + + versions := nested(crd, "spec", "versions").([]any) + v0 := versions[0].(map[string]any) + payment := nested(v0, "schema", "openAPIV3Schema", "properties", "spec", + "properties", "payment").(map[string]any) + props := payment["properties"].(map[string]any) + + // method discriminator: enum crypto;card, default crypto. + method, ok := props["method"].(map[string]any) + if !ok { + t.Fatal("payment.method property missing") + } + if method["default"] != "crypto" { + t.Errorf("payment.method.default = %v, want crypto", method["default"]) + } + gotEnum := map[string]bool{} + for _, e := range method["enum"].([]any) { + gotEnum[e.(string)] = true + } + if !gotEnum["crypto"] || !gotEnum["card"] { + t.Errorf("payment.method.enum = %v, want crypto+card", method["enum"]) + } + + // card block: account pattern + provider enum. + card, ok := props["card"].(map[string]any) + if !ok { + t.Fatal("payment.card property missing") + } + cardProps := card["properties"].(map[string]any) + account := cardProps["account"].(map[string]any) + if account["pattern"] != "^acct_[A-Za-z0-9]+$" { + t.Errorf("payment.card.account.pattern = %v, want ^acct_[A-Za-z0-9]+$", account["pattern"]) + } + provider := cardProps["provider"].(map[string]any) + provEnum := map[string]bool{} + for _, e := range provider["enum"].([]any) { + provEnum[e.(string)] = true + } + if !provEnum["stripe"] { + t.Errorf("payment.card.provider.enum = %v, want stripe", provider["enum"]) + } + + // payTo must no longer be unconditionally required (card offers omit it); + // the per-method requirement is enforced by CEL instead. + for _, r := range nested(payment, "required").([]any) { + if r.(string) == "payTo" || r.(string) == "network" { + t.Errorf("payment.required must not list %q (now CEL-gated by method)", r) + } + } + + // Three CEL rules: payTo-when-crypto, network-when-crypto, card.account-when-card. + rules, ok := payment["x-kubernetes-validations"].([]any) + if !ok { + t.Fatal("payment.x-kubernetes-validations missing") + } + if len(rules) != 3 { + t.Fatalf("payment x-kubernetes-validations count = %d, want 3", len(rules)) + } + joined := "" + for _, r := range rules { + joined += r.(map[string]any)["rule"].(string) + "\n" + } + for _, want := range []string{"self.payTo", "self.network", "self.card.account"} { + if !strings.Contains(joined, want) { + t.Errorf("CEL rules missing reference to %q; got:\n%s", want, joined) + } + } +} diff --git a/internal/embed/embed_servicebounty_crd_test.go b/internal/embed/embed_servicebounty_crd_test.go new file mode 100644 index 00000000..a26100ba --- /dev/null +++ b/internal/embed/embed_servicebounty_crd_test.go @@ -0,0 +1,103 @@ +package embed + +import "testing" + +// ───────────────────────────────────────────────────────────────────────────── +// ServiceBounty CRD tests +// ───────────────────────────────────────────────────────────────────────────── + +func TestServiceBountyCRD_Parses(t *testing.T) { + data, err := ReadInfrastructureFile("base/templates/servicebounty-crd.yaml") + if err != nil { + t.Fatalf("ReadInfrastructureFile: %v", err) + } + + crd := findDoc(multiDoc(data), "CustomResourceDefinition") + if crd == nil { + t.Fatal("no CustomResourceDefinition document found") + } + + if got := nested(crd, "metadata", "name"); got != "servicebounties.obol.org" { + t.Errorf("metadata.name = %v, want servicebounties.obol.org", got) + } + if got := nested(crd, "spec", "group"); got != "obol.org" { + t.Errorf("spec.group = %v, want obol.org", got) + } + if got := nested(crd, "spec", "names", "kind"); got != "ServiceBounty" { + t.Errorf("spec.names.kind = %v, want ServiceBounty", got) + } + if got := nested(crd, "spec", "scope"); got != "Namespaced" { + t.Errorf("spec.scope = %v, want Namespaced", got) + } + + short, _ := nested(crd, "spec", "names", "shortNames").([]any) + found := false + for _, s := range short { + if s == "sb" { + found = true + } + } + if !found { + t.Errorf("shortNames = %v, want it to include sb", short) + } +} + +func TestServiceBountyCRD_KeyFields(t *testing.T) { + data, err := ReadInfrastructureFile("base/templates/servicebounty-crd.yaml") + if err != nil { + t.Fatalf("ReadInfrastructureFile: %v", err) + } + + crd := findDoc(multiDoc(data), "CustomResourceDefinition") + if crd == nil { + t.Fatal("no CRD doc") + } + + versions, ok := nested(crd, "spec", "versions").([]any) + if !ok || len(versions) == 0 { + t.Fatal("spec.versions missing") + } + v0, _ := versions[0].(map[string]any) + + // status subresource present (the controller patches status). + if nested(v0, "subresources", "status") == nil { + t.Error("v1alpha1 missing status subresource") + } + + specProps := nested(v0, "schema", "openAPIV3Schema", "properties", "spec", "properties") + sp, ok := specProps.(map[string]any) + if !ok { + t.Fatal("spec.properties not an object") + } + + // spec.task.typeRef is the modular task-type anchor. + if nested(sp, "task", "properties", "typeRef") == nil { + t.Error("spec.task.typeRef missing — task-type modularity anchor") + } + + // hardwareProof enum present. + hw, _ := nested(sp, "task", "properties", "hardwareProof", "enum").([]any) + if len(hw) == 0 { + t.Error("spec.task.hardwareProof enum missing") + } + + // escrow scheme enum includes the live + future rails. + scheme, _ := nested(sp, "reward", "properties", "escrow", "properties", "scheme", "enum").([]any) + var hasUpto bool + for _, s := range scheme { + if s == "upto" { + hasUpto = true + } + } + if !hasUpto { + t.Errorf("reward.escrow.scheme enum = %v, want it to include upto", scheme) + } + + // reward carries the payment envelope needed to construct the upto auth: + // the chain it settles on and the poster's refund address. + for _, f := range []string{"network", "payTo"} { + if nested(sp, "reward", "properties", f) == nil { + t.Errorf("spec.reward.%s missing — required to build the escrow authorization", f) + } + } +} diff --git a/internal/embed/infrastructure/base/templates/evaluatorenrollment-crd.yaml b/internal/embed/infrastructure/base/templates/evaluatorenrollment-crd.yaml new file mode 100644 index 00000000..ca5d6dde --- /dev/null +++ b/internal/embed/infrastructure/base/templates/evaluatorenrollment-crd.yaml @@ -0,0 +1,154 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.16.5 + name: evaluatorenrollments.obol.org +spec: + group: obol.org + names: + kind: EvaluatorEnrollment + listKind: EvaluatorEnrollmentList + plural: evaluatorenrollments + shortNames: + - ee + singular: evaluatorenrollment + scope: Namespaced + versions: + - additionalPrinterColumns: + - jsonPath: .spec.address + name: Address + type: string + - jsonPath: .metadata.creationTimestamp + name: Age + type: date + name: v1alpha1 + schema: + openAPIV3Schema: + description: EvaluatorEnrollment opts an evaluator into the eval market. + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + properties: + address: + description: |- + Address is the evaluator's payout/identity address — the same address + used in eval-commit/eval-reveal annotations and bound into commitments. + pattern: ^0x[a-fA-F0-9]{40}$ + type: string + attestation: + description: |- + Attestation is the device-binding claim. v1 RECORDS it (sybil cost is + real hardware per identity once verification lands with the Secure + Enclave wiring); scheme "none" is honest-unattested. + properties: + publicKey: + description: PublicKey is the attestation public key (secure-enclave + scheme). + type: string + scheme: + description: 'Scheme: none (unattested) | secure-enclave (device-bound + P-256 key).' + enum: + - none + - secure-enclave + type: string + signature: + description: Signature is the enrollment signature over the address + (scheme-defined). + type: string + type: object + taskTypes: + description: TaskTypes this evaluator can re-run (versioned refs, + e.g. benchmark@v1). + items: + type: string + type: array + required: + - address + - taskTypes + type: object + status: + description: EvaluatorEnrollmentStatus is controller-owned ladder state. + properties: + observedGeneration: + format: int64 + type: integer + records: + description: |- + Records hold per-task-type ladder progress (reputation is per task + type — benchmark@v1 rep says nothing about finetune@v1). + items: + description: EvaluatorLadderRecord is one task type's ladder progress. + properties: + completed: + description: Completed counts all settled panel seats (any tier). + format: int64 + type: integer + divergences: + description: |- + Divergences counts settled seats graded out of band (incl. non/bad + reveals) — the negative reputation signal. + format: int64 + type: integer + groundedEvals: + description: |- + GroundedEvals counts settled seats whose verdict was grounded by an + on-chain ERC-8004 validation entry. + type: integer + lastEvalAt: + description: |- + LastEvalAt is when this evaluator's most recent seat settled — the + anchor for reputation decay (decayHalfLife). + format: date-time + type: string + probationEvals: + description: |- + ProbationEvals counts paid in-band evals while on Probation (promotion + to Full at the package threshold). + format: int64 + type: integer + recentFulfillers: + description: |- + RecentFulfillers are the last few fulfiller addresses this evaluator + judged — the pair-diversity rule down-weights repeat pairings. + items: + type: string + type: array + shadowAgreements: + description: |- + ShadowAgreements counts shadow verdicts within tolerance of the quorum + median (promotion to Probation at the task package's threshold). + format: int64 + type: integer + taskType: + type: string + tier: + description: 'Tier: Shadow | Probation | Full. New enrollments + start Shadow.' + type: string + type: object + type: array + type: object + type: object + served: true + storage: true + subresources: + status: {} diff --git a/internal/embed/infrastructure/base/templates/obol-agent-admission-policy.yaml b/internal/embed/infrastructure/base/templates/obol-agent-admission-policy.yaml index 98723b4d..fb5e5697 100644 --- a/internal/embed/infrastructure/base/templates/obol-agent-admission-policy.yaml +++ b/internal/embed/infrastructure/base/templates/obol-agent-admission-policy.yaml @@ -28,7 +28,7 @@ spec: operations: ["CREATE", "UPDATE"] - apiGroups: [""] apiVersions: ["v1"] - resources: ["namespaces", "secrets"] + resources: ["namespaces", "secrets", "configmaps"] operations: ["CREATE", "UPDATE"] - apiGroups: ["obol.org"] apiVersions: ["*"] @@ -46,6 +46,8 @@ spec: message: "Agent-created namespaces must be factory-owned agent-* namespaces" - expression: 'object.kind != "Secret" || (object.metadata.namespace.startsWith("agent-") && (object.metadata.name == "hermes-env" || object.metadata.name == "hermes-profile-seed"))' message: "Agent-created Secrets must be hermes-env or hermes-profile-seed inside agent-* namespaces" + - expression: 'object.kind != "ConfigMap" || object.metadata.name.endsWith("-skill-bundle")' + message: "Agent-written ConfigMaps must be *-skill-bundle skill bundles (hermes-config and other operator ConfigMaps are off-limits)" - expression: 'object.kind != "Agent" || (object.metadata.namespace == "agent-" + object.metadata.name && (!has(object.spec.runtime) || object.spec.runtime == "hermes"))' message: "Agent-created Agent CRs must be Hermes agents in their matching agent-* namespace" diff --git a/internal/embed/infrastructure/base/templates/obol-agent-monetize-rbac.yaml b/internal/embed/infrastructure/base/templates/obol-agent-monetize-rbac.yaml index bf2890af..afafd8aa 100644 --- a/internal/embed/infrastructure/base/templates/obol-agent-monetize-rbac.yaml +++ b/internal/embed/infrastructure/base/templates/obol-agent-monetize-rbac.yaml @@ -156,3 +156,76 @@ subjects: - kind: ServiceAccount name: openclaw namespace: openclaw-obol-agent + +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: hermes-skill-publish + namespace: hermes-obol-agent +rules: + - apiGroups: [""] + resources: ["configmaps"] + verbs: ["create", "get", "update", "patch"] + +--- +#------------------------------------------------------------------------------ +# Role (NAMESPACED) - ServiceBounty demand-side write, hermes mother ns only +# +# Deliberately NOT added to the cluster-wide openclaw-monetize-write +# ClusterRole: that would hand every agent write access to every namespace's +# bounties (and therefore their claim/submit/verdict annotation channel and +# escrow lifecycle). The poster agent only ever needs to manage bounties in +# its own namespace. See plans/bounty-ane-marketplace-design.md (review fix #2). +#------------------------------------------------------------------------------ +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: hermes-bounty-write + namespace: hermes-obol-agent +rules: + - apiGroups: ["obol.org"] + resources: ["servicebounties"] + verbs: ["get", "list", "watch", "create", "update", "patch", "delete"] + - apiGroups: ["obol.org"] + resources: ["servicebounties/status"] + verbs: ["get"] + # Evaluator enrollment is namespaced for the same reason bounty writes are: + # an agent manages its OWN enrollment, never the cluster's pool. Ladder + # state (status) stays controller-owned — read-only here. + - apiGroups: ["obol.org"] + resources: ["evaluatorenrollments"] + verbs: ["get", "list", "watch", "create", "update", "patch", "delete"] + - apiGroups: ["obol.org"] + resources: ["evaluatorenrollments/status"] + verbs: ["get"] + +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: hermes-skill-publish-binding + namespace: hermes-obol-agent +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: hermes-skill-publish +subjects: + - kind: ServiceAccount + name: hermes + namespace: hermes-obol-agent + +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: hermes-bounty-write-binding + namespace: hermes-obol-agent +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: hermes-bounty-write +subjects: + - kind: ServiceAccount + name: hermes + namespace: hermes-obol-agent diff --git a/internal/embed/infrastructure/base/templates/servicebounty-crd.yaml b/internal/embed/infrastructure/base/templates/servicebounty-crd.yaml new file mode 100644 index 00000000..31a1472a --- /dev/null +++ b/internal/embed/infrastructure/base/templates/servicebounty-crd.yaml @@ -0,0 +1,645 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.16.5 + name: servicebounties.obol.org +spec: + group: obol.org + names: + kind: ServiceBounty + listKind: ServiceBountyList + plural: servicebounties + shortNames: + - sb + singular: servicebounty + scope: Namespaced + versions: + - additionalPrinterColumns: + - jsonPath: .spec.task.typeRef + name: Task + type: string + - jsonPath: .spec.reward.amount + name: Reward + type: string + - jsonPath: .spec.eval.mode + name: Verification + type: string + - jsonPath: .status.phase + name: Phase + type: string + - jsonPath: .metadata.creationTimestamp + name: Age + type: date + name: v1alpha1 + schema: + openAPIV3Schema: + description: |- + ServiceBounty declares a unit of paid work (benchmark, fine-tune, serve, …) + with an escrowed reward released on an accepted verdict. + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + properties: + acceptance: + description: |- + Acceptance is how a submission is judged. Defaults come from the task + type; the poster may tighten them. + properties: + commitReveal: + description: |- + CommitReveal requires evaluators to commit then reveal scores, so they + can't pre-agree on a number. + type: boolean + method: + description: |- + Method judges a submission. Benchmarks are NOT bit-exact: rerun-tolerance + re-runs the harness and accepts a score within tolerance. The commitHash + is integrity (anti bait-and-switch), not a determinism gate. + enum: + - rerun-tolerance + - harness-rerun + - sla-probe + - poster-manual + type: string + tolerance: + additionalProperties: + type: string + description: Tolerance per metric (e.g. {"mmlu":"0.01"}). Default + from the task type. + type: object + type: object + deadline: + description: 'Deadline: past it with no accepted verdict → Expired + → Refunded.' + format: date-time + type: string + eval: + description: |- + Eval configures the OBOL-paid evaluation market (a SEPARATE payment leg + from the reward — x402 cannot splice a fee out of the reward auth). + properties: + k: + default: 1 + description: |- + K evaluators: median-of-k quorum; k≥3 whenever a probation seat is + occupied (the median absorbs one outlier). + format: int64 + type: integer + mode: + default: required + description: |- + Mode gates verification. 'required' (default) routes acceptance through + the evaluator quorum once the eval market is wired — until then a poster + verdict is recorded as PosterOverride. 'dangerouslySkipped' declares + poster-as-judge up front: same override path, but the bounty is marked + unverified and produces no reputation signal. + enum: + - required + - dangerouslySkipped + type: string + payment: + description: Payment for evaluators — a separate leg from the + reward. + properties: + asset: + default: OBOL + description: Asset defaults to OBOL (verification is an OBOL + utility sink). + type: string + fundedBy: + default: poster + description: 'FundedBy: ''poster'' (separate poster-funded + eval budget).' + type: string + perEvaluator: + description: PerEvaluator fee (human units). + type: string + settle: + default: batch-settlement + description: 'Settle: ''batch-settlement'' pays all K evaluators + in one tx.' + type: string + type: object + selection: + description: |- + Selection: VRF-sampled after submission, reputation-weighted; the poster + cannot hand-pick. + enum: + - vrf-reputation-weighted + - poster-manual + type: string + type: object + maxFulfillers: + default: 1 + description: 'MaxFulfillers: 1 = single-winner (default); >1 = first-N-valid + paid.' + format: int64 + type: integer + reward: + description: Reward is the escrowed payment released to the fulfiller + on acceptance. + properties: + amount: + description: Amount is the lump-sum reward (human units, e.g. + "500.00"). + type: string + asset: + description: Asset reuses ServiceOffer's asset shape (USDC eip3009 + / OBOL permit2). + properties: + address: + description: ERC-20 contract address. + pattern: ^0x[0-9a-fA-F]{40}$ + type: string + decimals: + description: Token decimals in atomic units. + format: int64 + maximum: 255 + minimum: 0 + type: integer + eip712Name: + description: EIP-712 domain name used by the token. + type: string + eip712Version: + description: EIP-712 domain version used by the token. + type: string + symbol: + description: Human-friendly token symbol (e.g. USDC, OBOL). + type: string + transferMethod: + description: x402 transfer method for the asset. + enum: + - eip3009 + - permit2 + type: string + type: object + escrow: + description: Escrow selects the x402 settlement rail + reputation-driven + mode. + properties: + facilitator: + description: |- + Facilitator URL (our own facilitator acts as the bounded settlement + trigger; payTo is signed into the auth so it can never redirect funds). + type: string + mode: + description: |- + Mode is selected by the fulfiller's reputation: 'auto' (optimistic), + 'facilitator-check' (deterministic re-run), 'onchain-lock' (authCapture). + enum: + - auto + - facilitator-check + - onchain-lock + type: string + scheme: + description: |- + Scheme: 'upto' (live: facilitator holds a recipient-bound auth, settles + ≤ max) or 'authCapture' (funds-locked, used above valueCap once the Go + impl lands — x402-foundation/x402#2298). + enum: + - upto + - authCapture + type: string + valueCapMicros: + description: 'ValueCapMicros: above this the escrow must use + an on-chain lock.' + type: string + type: object + network: + description: Payment network (e.g. "base", "base-sepolia"). + type: string + payTo: + description: |- + PayTo is the poster's address: the escrow-return / refund destination. + The fulfiller payout address is bound at claim time (witness.to in the + upto auth), not here. + pattern: ^0x[a-fA-F0-9]{40}$ + type: string + type: object + task: + description: |- + Task describes the work. spec.task.typeRef selects an embedded, + versioned task-type package; spec.task.params is validated against + that package's schema at admission. + properties: + datasetCommit: + description: |- + DatasetCommit pins the eval dataset (committed root + the fraction kept + private so a public re-run can't leak answers / enable train-on-test). + properties: + privateFraction: + description: |- + PrivateFraction (0..1, as a string to keep schema stable) of rows kept + secret and revealed only to sampled evaluators at eval time. + type: string + root: + description: Root is a Merkle root committing the (partially + private) eval dataset. + type: string + type: object + hardwareProof: + description: |- + HardwareProof strength required of the fulfiller. self-report is a + reputation-backed claim (forgeable); gpu-attestation is cryptographic + (NVIDIA CC / enclave-binding); evaluator-measured moves the throughput + measurement onto attested evaluator hardware. + enum: + - self-report + - gpu-attestation + - evaluator-measured + type: string + params: + additionalProperties: + type: string + description: Free-form knobs validated against the task type's + param schema. + type: object + targetModel: + description: Target model metadata (reuses ServiceOffer's model + shape). + properties: + name: + description: Model identifier (e.g. qwen3.5:35b). + type: string + runtime: + description: Runtime serving the model. + enum: + - ollama + - vllm + - tgi + type: string + required: + - name + - runtime + type: object + typeRef: + description: TypeRef resolves an embedded task-type package, e.g. + "benchmark@v1". + type: string + required: + - typeRef + type: object + trust: + description: |- + Trust selects the reputation gate + optional refundable self-bond. No + validator stake, no slashing — reputation (lost future income) is the + only collateral. + properties: + reputationGate: + description: |- + ReputationGate derives the fulfiller's maxBountyValue from ERC-8004 + getSummary (read with a curated, trusted client filter). + type: boolean + selfBond: + description: |- + SelfBond is an OPTIONAL refundable bond the fulfiller posts from their + OWN funds (returned on success). It is never slashed to a validator set. + properties: + amount: + type: string + required: + type: boolean + token: + type: string + type: object + type: object + required: + - reward + - task + type: object + status: + description: |- + ServiceBountyStatus mirrors the AND-rollup condition idiom used by + ServiceOffer. Machine truth is the condition set; Phase is the human rollup. + properties: + bondState: + description: |- + BondState tracks the fulfiller self-bond at the escrow gateway: + Reserved | Returned (success/honest timeout) | Forfeited (rejected work, + offsets the poster's burned eval budget). + type: string + captureTxHash: + description: CaptureTxHash / RefundTxHash record the settled reward + or refund. + type: string + claims: + description: |- + Claims are observed fulfiller bindings (single-winner is the common case, + so claims live in status, not a separate CR). + items: + properties: + claimedAt: + format: date-time + type: string + commitHash: + description: |- + CommitHash binds the worker to a specific model + outputs (anti + bait-and-switch), revealed at submit. + type: string + fulfillerAddress: + type: string + phase: + description: 'Phase: Claimed | Submitted | Verified | Rejected.' + type: string + type: object + type: array + conditions: + items: + properties: + lastTransitionTime: + description: Last time the condition transitioned. + format: date-time + type: string + message: + description: Human-readable message with details. + type: string + reason: + description: Machine-readable reason for the condition. + type: string + status: + description: Status of the condition. + enum: + - "True" + - "False" + - Unknown + type: string + type: + description: Condition type. + type: string + required: + - status + - type + type: object + type: array + escalation: + description: |- + Escalation is the second-round eval state opened when the first-round + quorum diverges beyond the task's escalation epsilon. + properties: + budgetState: + description: |- + BudgetState tracks the escalation eval budget at the escrow gateway: + Reserved | Captured | Voided. + type: string + evaluations: + description: Evaluations are the escalation round's commit-reveal + records. + items: + description: |- + ServiceBountyEvaluation is one evaluator's commit-reveal record. WithinBand + is the per-bounty ladder bookkeeping hook: divergence from the quorum median + (or a missing/invalid reveal) is what future reputation feedback keys on. + properties: + address: + description: Address is the evaluator's payout/identity + address (annotation key suffix). + type: string + commitHash: + description: CommitHash = EvalCommitHash(score, salt, address), + promoted first-write-wins. + type: string + grounded: + description: |- + Grounded marks a verdict backed by an on-chain ERC-8004 validation + entry observed for this bounty's eval-request hash — the chain-anchored + reputation signal, as opposed to an annotation-only reveal. + type: boolean + paid: + description: |- + Paid marks inclusion in the eval-budget batch settlement (counting + seats that revealed validly; shadows evaluate free). + type: boolean + phase: + description: 'Phase: Committed | Revealed | BadReveal | + NonReveal.' + type: string + revealedAt: + description: RevealedAt records when a valid reveal was + promoted. + format: date-time + type: string + score: + description: Score is the revealed 0-100 verdict (ERC-8004 + validationResponse semantics). + format: int64 + type: integer + seat: + description: |- + Seat mirrors the panel seat kind (full | probation | shadow); empty in + open-door mode. + type: string + validationTxHash: + description: |- + ValidationTxHash is the evaluator-submitted ERC-8004 validationResponse + transaction, recorded as provenance (the evaluator's OWN wallet signs; + the controller never does). + type: string + withinBand: + description: |- + WithinBand is false for NonReveal/BadReveal and for revealed scores + outside the outlier band around the quorum median. + type: boolean + type: object + type: array + panel: + description: Panel is the escalation-round seat assignment. + items: + description: ServiceBountyPanelSeat is one selected evaluator + seat. + properties: + address: + description: Address is the enrolled evaluator's address. + type: string + seat: + description: 'Seat: full | probation | shadow.' + type: string + type: object + type: array + reason: + description: Reason records why the escalation opened (e.g. quorum + divergence). + type: string + revealDeadline: + description: RevealDeadline is the escalation round's commit→reveal + cutoff. + format: date-time + type: string + round: + description: Round is the escalation round number (1 = first escalation). + type: integer + voucherDeadline: + description: VoucherDeadline is when the escalation eval-budget + voucher expires. + format: date-time + type: string + required: + - round + type: object + escrowSpender: + description: |- + EscrowSpender is the facilitator address Permit2 vouchers must name as + the only executor (Receipt.Spender echoed into status for signers). + type: string + escrowState: + description: 'EscrowState: Reserved | Captured | Voided (held auth + at the facilitator).' + type: string + evalBudgetState: + description: |- + EvalBudgetState tracks the poster-funded OBOL eval budget + (k × perEvaluator) at the escrow gateway: Reserved | Captured | Voided. + Evaluators are paid for the WORK, pass or fail. + type: string + evalPayoutTxHash: + description: EvalPayoutTxHash records the batch-settlement receipt + for the eval leg. + type: string + evaluations: + description: |- + Evaluations are the eval-market verdicts promoted from the + obol.org/eval-commit- / eval-reveal- annotation channel. + items: + description: |- + ServiceBountyEvaluation is one evaluator's commit-reveal record. WithinBand + is the per-bounty ladder bookkeeping hook: divergence from the quorum median + (or a missing/invalid reveal) is what future reputation feedback keys on. + properties: + address: + description: Address is the evaluator's payout/identity address + (annotation key suffix). + type: string + commitHash: + description: CommitHash = EvalCommitHash(score, salt, address), + promoted first-write-wins. + type: string + grounded: + description: |- + Grounded marks a verdict backed by an on-chain ERC-8004 validation + entry observed for this bounty's eval-request hash — the chain-anchored + reputation signal, as opposed to an annotation-only reveal. + type: boolean + paid: + description: |- + Paid marks inclusion in the eval-budget batch settlement (counting + seats that revealed validly; shadows evaluate free). + type: boolean + phase: + description: 'Phase: Committed | Revealed | BadReveal | NonReveal.' + type: string + revealedAt: + description: RevealedAt records when a valid reveal was promoted. + format: date-time + type: string + score: + description: Score is the revealed 0-100 verdict (ERC-8004 validationResponse + semantics). + format: int64 + type: integer + seat: + description: |- + Seat mirrors the panel seat kind (full | probation | shadow); empty in + open-door mode. + type: string + validationTxHash: + description: |- + ValidationTxHash is the evaluator-submitted ERC-8004 validationResponse + transaction, recorded as provenance (the evaluator's OWN wallet signs; + the controller never does). + type: string + withinBand: + description: |- + WithinBand is false for NonReveal/BadReveal and for revealed scores + outside the outlier band around the quorum median. + type: boolean + type: object + type: array + evaluatorPanel: + description: |- + EvaluatorPanel is the controller-selected seat assignment (deterministic + per-bounty sampling from enrolled evaluators). Empty panel = open-door + fallback (insufficient pool) — any address may evaluate, as in early v1. + items: + description: ServiceBountyPanelSeat is one selected evaluator seat. + properties: + address: + description: Address is the enrolled evaluator's address. + type: string + seat: + description: 'Seat: full | probation | shadow.' + type: string + type: object + type: array + ladderRecorded: + description: |- + LadderRecorded latches the one-shot cross-bounty ladder bookkeeping so + repeated reconciles after quorum never double-count. + type: boolean + observedGeneration: + format: int64 + type: integer + panelSeed: + description: |- + PanelSeed records the randomness source the evaluator panel was drawn + from, so the sampling is auditable (drand round, raw randomness, sig). + properties: + randomness: + description: Randomness is the beacon output the panel sampling + was keyed on. + type: string + round: + description: Round is the drand round the randomness came from. + format: int64 + type: integer + signature: + description: Signature is the beacon signature proving the randomness. + type: string + source: + description: Source names the randomness origin (e.g. drand, local-dev). + type: string + required: + - source + type: object + phase: + type: string + refundTxHash: + type: string + reportURI: + description: ReportURI points at the SIWx/local-gated A2UI report + (deliverable). + type: string + revealDeadline: + description: |- + RevealDeadline opens once K commitments are in: every commit closes + before any reveal opens, and a missing reveal past this instant is + graded as a worst-case outlier (nonRevealPenalty). + format: date-time + type: string + weightedScore: + description: WeightedScore is the reputation-weighted eval verdict + (0-100). + format: int64 + type: integer + type: object + type: object + served: true + storage: true + subresources: + status: {} diff --git a/internal/embed/infrastructure/base/templates/serviceoffer-crd.yaml b/internal/embed/infrastructure/base/templates/serviceoffer-crd.yaml index 274cd539..41c7bd04 100644 --- a/internal/embed/infrastructure/base/templates/serviceoffer-crd.yaml +++ b/internal/embed/infrastructure/base/templates/serviceoffer-crd.yaml @@ -148,11 +148,25 @@ spec: pattern: ^/[a-zA-Z0-9/_.-]*$ type: string payment: + description: |- + ServiceOfferPayment describes how buyers pay for the offer. Two methods + are supported, selected by Method: + + - "crypto" (default): x402 on-chain stablecoin settlement. Network and + PayTo are required and PayTo must be a 0x EVM address. + - "card": an MPP credit-card method (Stripe stripe.charge). Card is + required; funds settle off-chain into the configured Stripe account + and Network/PayTo do not apply. + + The per-method required fields are enforced by the XValidation rules + below so the API server rejects malformed offers at admission time, + independent of the CLI. The CEL guards short-circuit on Method so the + 0x/account checks are only evaluated for the relevant method. properties: asset: description: |- Optional token metadata override for x402 settlement. When omitted, - the verifier uses the chain default asset. + the verifier uses the chain default asset. Crypto only. properties: address: description: ERC-20 contract address. @@ -180,24 +194,81 @@ spec: - permit2 type: string type: object + card: + description: Card payment terms. Required when method=card; ignored + otherwise. + properties: + account: + description: |- + Destination account that receives settled card funds. For Stripe this + is the connected/destination account id (e.g. "acct_1A2b3C4d5E6f7G"). + pattern: ^acct_[A-Za-z0-9]+$ + type: string + currency: + default: usd + description: ISO-4217 currency the card is charged in. Default + "usd". + pattern: ^[a-z]{3}$ + type: string + networkId: + description: |- + Optional Stripe "machine payments" network id, surfaced in the 402 + challenge's extra block so MPP card clients know where to mint a + Shared Payment Token. + type: string + paymentMethodTypes: + description: |- + Accepted payment-method types advertised to the client. Defaults to + ["card"] at the gateway when empty. + items: + type: string + maxItems: 16 + type: array + provider: + default: stripe + description: |- + Card payment provider. Only "stripe" is supported today (MPP + stripe.charge via Shared Payment Tokens). + enum: + - stripe + type: string + type: object maxTimeoutSeconds: default: 300 description: 'Payment validity window in seconds (x402: maxTimeoutSeconds).' format: int64 type: integer + method: + default: crypto + description: |- + Payment method. "crypto" gates with x402 on-chain stablecoin + settlement (default; preserves existing behavior). "card" gates with + an MPP credit-card method (Stripe) that settles off-chain into + spec.payment.card.account. + enum: + - crypto + - card + type: string network: description: |- Chain identifier for payments (human-friendly). Reconciler resolves - to CAIP-2 format (e.g., "base-sepolia" → "eip155:84532"). + to CAIP-2 format (e.g., "base-sepolia" → "eip155:84532"). Required + when method=crypto (enforced by the payment XValidation rules); + unused for card payments. type: string payTo: - description: 'USDC recipient wallet address (x402: payTo).' + description: |- + USDC recipient wallet address (x402: payTo). Required and 0x-format + when method=crypto (enforced by the payment XValidation rules); + unused for card payments. pattern: ^0x[0-9a-fA-F]{40}$ type: string price: description: |- - Pricing table with per-unit prices in USDC (human-readable decimals). - Which fields are applicable depends on the workload type. + Pricing table with per-unit prices (human-readable decimals). For + crypto the unit is the settlement token (USDC by default); for card + the unit is payment.card.currency. Which fields are applicable + depends on the workload type. properties: perEpoch: description: Per-training-epoch price in USDC. Fine-tuning @@ -219,15 +290,23 @@ spec: type: object scheme: default: exact - description: x402 payment scheme. + description: x402 payment scheme. Only meaningful when method=crypto. enum: - exact type: string required: - - network - - payTo - price type: object + x-kubernetes-validations: + - message: payment.payTo is required when payment.method is crypto + rule: 'self.method != ''card'' ? has(self.payTo) : true' + - message: payment.network is required when payment.method is crypto + rule: 'self.method != ''card'' ? (has(self.network) && size(self.network) + > 0) : true' + - message: payment.card.account is required when payment.method is + card + rule: 'self.method == ''card'' ? (has(self.card) && has(self.card.account)) + : true' provenance: additionalProperties: type: string @@ -304,19 +383,68 @@ spec: type: string type: array type: object + skill: + description: |- + Required when type='skill' (enforced by the spec-level XValidation + rule). Describes the downloadable skill bundle being sold: identity + (name@version), integrity hash, and the ConfigMap carrying the + artifact. The controller renders a static bundle server from this + block and refuses to publish when the ConfigMap bytes do not match + sha256. + properties: + bundleConfigMap: + description: |- + Name of a ConfigMap in the offer's namespace whose + binaryData["bundle.tar.gz"] is the artifact (key: SkillBundleKey). + maxLength: 253 + type: string + description: + description: Short human-readable description for catalog surfaces. + maxLength: 1024 + type: string + displayName: + description: Human-friendly display name for catalog surfaces. + maxLength: 128 + type: string + name: + description: |- + Skill name (e.g. buy-x402). Combined with Version it forms the + skill ref @ used by ERC-8004 feedback tags. + maxLength: 64 + pattern: ^[a-z0-9][a-z0-9-]*$ + type: string + sha256: + description: |- + Lowercase hex sha256 of the gzipped bundle bytes (the exact bytes + stored in the bundle ConfigMap and served to buyers). + pattern: ^[a-f0-9]{64}$ + type: string + version: + description: Skill version (e.g. 0.1.0). + maxLength: 64 + pattern: ^[A-Za-z0-9][A-Za-z0-9._-]*$ + type: string + required: + - bundleConfigMap + - name + - sha256 + - version + type: object type: default: http description: |- Service type. 'inference' enables model management; 'http' for any HTTP service; 'agent' references an Agent CR via spec.agent.ref and the controller derives upstream + model + skills from the agent's status; - 'dataset' sells a versioned dataset artifact via spec.dataset. + 'dataset' sells a versioned dataset artifact via spec.dataset; + 'skill' sells a downloadable skill bundle via spec.skill. enum: - inference - fine-tuning - http - agent - dataset + - skill type: string upstream: description: In-cluster service that handles the actual workload. @@ -346,6 +474,9 @@ spec: required: - payment type: object + x-kubernetes-validations: + - message: spec.skill is required when type=skill + rule: self.type != 'skill' || has(self.skill) status: properties: agentId: diff --git a/internal/embed/infrastructure/base/templates/x402.yaml b/internal/embed/infrastructure/base/templates/x402.yaml index d3209cec..ae64671a 100644 --- a/internal/embed/infrastructure/base/templates/x402.yaml +++ b/internal/embed/infrastructure/base/templates/x402.yaml @@ -2,6 +2,9 @@ # x402 runtime components: # - x402-verifier: shared seller-owned x402 gateway (and legacy /verify endpoint) # - serviceoffer-controller: control-plane reconciler for ServiceOffer child resources +# - x402-escrow: ServiceBounty escrow facilitator (verify/hold Permit2 vouchers, +# settle permitTransferFrom on capture). ClusterIP-internal ONLY — never +# routed through Traefik or the tunnel. apiVersion: v1 kind: Namespace metadata: @@ -57,6 +60,12 @@ metadata: type: Opaque stringData: WALLET_ADDRESS: "" + # Stripe secret key for MPP credit-card (stripe.charge) offers. Empty on a + # crypto-only stack; populate it (e.g. `kubectl -n x402 patch secret + # x402-secrets --type merge -p '{"stringData":{"STRIPE_SECRET_KEY":"sk_live_..."}}'`) + # to let the verifier authorize/capture card PaymentIntents. See README + # "Credit-card payments (MPP)". + STRIPE_SECRET_KEY: "" --- apiVersion: v1 @@ -130,6 +139,24 @@ rules: - apiGroups: ["obol.org"] resources: ["purchaserequests/status"] verbs: ["get", "update", "patch"] + # ServiceBounty demand-side reconcile: watch + finalizer updates + status. + # The controller never CREATES bounties (posters do), and the bounty pass + # creates no routes/Middleware/Secrets — a bounty must never become ingress. + - apiGroups: ["obol.org"] + resources: ["servicebounties"] + verbs: ["get", "list", "watch", "update", "patch"] + - apiGroups: ["obol.org"] + resources: ["servicebounties/status"] + verbs: ["get", "update", "patch"] + # Evaluator pool: the controller READS enrollments for panel selection and + # writes only the controller-owned ladder STATE (status). It never creates + # or deletes enrollments — evaluators do. + - apiGroups: ["obol.org"] + resources: ["evaluatorenrollments"] + verbs: ["get", "list", "watch"] + - apiGroups: ["obol.org"] + resources: ["evaluatorenrollments/status"] + verbs: ["get", "update", "patch"] - apiGroups: ["obol.org"] resources: ["agents"] verbs: ["get", "list", "watch", "update", "patch"] @@ -272,6 +299,16 @@ spec: - --config=/config/pricing.yaml - --listen=:8080 - --route-source=kube + env: + # MPP credit-card (Stripe) settlement key. Empty unless card offers + # are in use; optional=true keeps the verifier starting on + # crypto-only stacks where the key is unset. + - name: STRIPE_SECRET_KEY + valueFrom: + secretKeyRef: + name: x402-secrets + key: STRIPE_SECRET_KEY + optional: true volumeMounts: - name: pricing-config mountPath: /config @@ -373,6 +410,18 @@ spec: valueFrom: fieldRef: fieldPath: metadata.namespace + # ServiceBounty escrow seam. The escrow endpoint + credential come + # ONLY from controller env (never from CR spec or annotations) — + # see newBountyEscrowGateway. The controller holds no keys: it is + # a bounded release trigger against the escrow facilitator. + - name: OBOL_BOUNTY_ESCROW_URL + value: "http://x402-escrow.x402.svc.cluster.local:8403" + - name: OBOL_BOUNTY_ESCROW_TOKEN + valueFrom: + secretKeyRef: + name: x402-escrow + key: token + optional: true args: [] resources: requests: @@ -386,6 +435,144 @@ spec: # 256Mi and triggered OOMKilled restart loops. memory: 512Mi +--- +# x402-escrow: the ServiceBounty escrow facilitator. Holds poster-signed +# Permit2 batch-transfer vouchers (reserve), settles permitTransferFrom +# on-chain (capture), drops holds store-only (void). +# +# Security posture: +# - ClusterIP-internal ONLY. No Traefik HTTPRoute, no Middleware, no tunnel +# exposure — the only callers are the serviceoffer-controller (via +# OBOL_BOUNTY_ESCROW_URL) and in-cluster operators. +# - The serviceoffer-controller never signs and holds no keys; the escrow +# settlement key/credentials live ONLY in the optional `x402-escrow` +# Secret consumed here (keys: `token` = bearer auth for POST /escrow/*, +# `key` = hex settlement private key). Both are optional: with no token +# the routes are unauthenticated (dev); with no key, capture returns 503 +# while reserve/void keep working. +# - State is an emptyDir: escrow entries are vouchers + receipts, and a +# voucher lost PRE-capture only means the poster re-attaches it (reserve +# is idempotent and re-runs from the obol.org/*-voucher annotations). +# Captured receipts are also recorded on-chain, so replay after pod churn +# re-converges from the chain + annotation channel. +apiVersion: apps/v1 +kind: Deployment +metadata: + name: x402-escrow + namespace: x402 + labels: + app: x402-escrow +spec: + # Single replica: the file-backed store serializes per-id operations + # in-process; multiple replicas would race reserve/capture on the same id. + replicas: 1 + selector: + matchLabels: + app: x402-escrow + template: + metadata: + labels: + app: x402-escrow + spec: + # PSS Restricted: pod-level identity, same posture as the verifier. + securityContext: + runAsNonRoot: true + runAsUser: 65532 + runAsGroup: 65532 + fsGroup: 65532 + seccompProfile: + type: RuntimeDefault + containers: + - name: escrow + # Pinned like the sibling images in this file. The publish workflow + # adds the @sha256 digest on the first image-bump PR (Renovate); + # under OBOL_DEVELOPMENT the pin is rewritten to the local dev tag + # (internal/defaults.rewriteDevDigestPins, lockstep with + # internal/stack.baseLocalImages). + image: ghcr.io/obolnetwork/x402-escrow:04bebbc + imagePullPolicy: IfNotPresent + securityContext: + allowPrivilegeEscalation: false + readOnlyRootFilesystem: true + capabilities: + drop: ["ALL"] + ports: + - name: http + containerPort: 8403 + protocol: TCP + args: + - --listen=:8403 + env: + # Bearer token for POST /escrow/* (reserve/capture/void). Empty = + # unauthenticated (dev-only; the controller gateway symmetrically + # omits the Authorization header when its token is empty). + - name: OBOL_ESCROW_TOKEN + valueFrom: + secretKeyRef: + name: x402-escrow + key: token + optional: true + # Hex settlement key (the Permit2 voucher spender). Optional: + # without it capture returns 503 and vouchers cannot be + # spender-bound, but voucher-less reserve/void still work. + - name: OBOL_ESCROW_KEY + valueFrom: + secretKeyRef: + name: x402-escrow + key: key + optional: true + - name: OBOL_ESCROW_RPC_BASE + value: "http://erpc.erpc.svc.cluster.local/rpc" + # File-backed JSON store on an emptyDir: losing it pre-capture + # only means re-attaching the voucher (see header comment). + - name: OBOL_ESCROW_STATE_DIR + value: "/data" + volumeMounts: + - name: escrow-state + mountPath: /data + readinessProbe: + httpGet: + path: /healthz + port: http + initialDelaySeconds: 3 + periodSeconds: 5 + timeoutSeconds: 2 + livenessProbe: + httpGet: + path: /healthz + port: http + initialDelaySeconds: 10 + periodSeconds: 10 + timeoutSeconds: 2 + resources: + requests: + cpu: 25m + memory: 64Mi + limits: + cpu: 250m + memory: 256Mi + volumes: + - name: escrow-state + emptyDir: {} + +--- +apiVersion: v1 +kind: Service +metadata: + name: x402-escrow + namespace: x402 + labels: + app: x402-escrow +spec: + type: ClusterIP + selector: + app: x402-escrow + ports: + - name: http + port: 8403 + targetPort: http + protocol: TCP + --- apiVersion: v1 kind: Service diff --git a/internal/embed/k3s-config.yaml b/internal/embed/k3s-config.yaml index be8e7b77..e9a1af0c 100644 --- a/internal/embed/k3s-config.yaml +++ b/internal/embed/k3s-config.yaml @@ -13,11 +13,15 @@ data-dir: {{DATA_DIR}}/k3s bind-address: 0.0.0.0 https-listen-port: 6443 -# TLS SANs for local access +# TLS SANs for local + LAN access. k3s already auto-adds the node's primary +# IP, but listing it (and the hostname) explicitly lets worker nodes join via +# either address and keeps the API cert deterministic across restarts. tls-san: - "127.0.0.1" - "localhost" - "obol.stack" + - "{{NODE_IP}}" + - "{{NODE_HOSTNAME}}" # Relax eviction thresholds: k3s reports imagefs capacity as 0 on shared # filesystems, causing spurious disk-pressure taints with percentage thresholds. diff --git a/internal/embed/skills/dataset-anonymize/SKILL.md b/internal/embed/skills/dataset-anonymize/SKILL.md new file mode 100644 index 00000000..d811cdc7 --- /dev/null +++ b/internal/embed/skills/dataset-anonymize/SKILL.md @@ -0,0 +1,63 @@ +--- +name: dataset-anonymize +description: Anonymize a dataset's JSONL (PII detection + masking) before publishing or selling it with `obol dataset`. Pluggable detector — built-in regex redactor by default, or a BYO Hugging Face token-classification model. +--- + +# dataset-anonymize + +Strip personally-identifying information from a training dataset **before** it +is published or sold. Runs as the last privacy stage over the export bundle's +`*.jsonl` artifact, replacing detected PII spans with typed placeholders +(``, ``, …) so the bytes that leave the host carry no raw +secrets. + +This is a **pluggable** stage: + +- **Default (no setup):** a built-in, dependency-free regex redactor covers the + common high-signal categories (emails, IPs, credit-card / IBAN-shaped + numbers, US-SSN-shaped numbers, bearer/API-key-shaped tokens, private keys, + absolute home paths, phone numbers). +- **ML-grade (opt-in):** set `OBOL_ANONYMIZER_MODEL` to any Hugging Face + token-classification PII model and the script runs it via + `transformers.pipeline("token-classification", …)`, unioning its spans with + the regex pass. The model cache lands under the obol data dir (see below) so + it survives across runs and is never re-downloaded per invocation. + +The detector is replaceable by design: a stricter custom detector is "implement +a `detect(text) -> spans` and register it," not "edit a config row" — author a +sibling script and point `--detector` at it. + +## Usage + +```bash +# Default regex redactor: +python3 scripts/anonymize.py input.jsonl anonymized.jsonl --report + +# ML-grade detection with a BYO model: +export OBOL_ANONYMIZER_MODEL="/" +python3 scripts/anonymize.py input.jsonl anonymized.jsonl --report + +# Then ingest the anonymized bundle and publish it: +obol dataset from --name my-dataset +obol dataset publish my-dataset +``` + +Each input line is a JSON object; the script masks string values under +`messages[].content`, `text`, `input`, `output`, and `completion` (the common +chat/instruction fields) and leaves structure untouched. Anonymization is +deterministic within a run: the same raw value maps to the same placeholder +index, so cross-message references stay linkable without revealing the value. + +## Model cache convention + +The script exports `HF_HOME="$OBOL_DATA_DIR/cache/huggingface"` (falling back to +`$XDG_CACHE_HOME/obol/huggingface`, then `~/.cache/obol/huggingface`) before +loading any model, so downloads land under the standard obol data dir. + +## Honest limits + +Recall is bounded by the detector. The regex pass catches structured PII, not +free-text names/addresses — for those, supply a model via +`OBOL_ANONYMIZER_MODEL`. `--report` prints per-category masked counts so an +operator can sanity-check coverage before selling. Validate against your own +data; the contract is preserved so a stricter detector can replace the default. diff --git a/internal/embed/skills/dataset-anonymize/scripts/anonymize.py b/internal/embed/skills/dataset-anonymize/scripts/anonymize.py new file mode 100644 index 00000000..00a3b873 --- /dev/null +++ b/internal/embed/skills/dataset-anonymize/scripts/anonymize.py @@ -0,0 +1,151 @@ +#!/usr/bin/env python3 +"""Anonymize a dataset JSONL before publishing/selling it. + +Replaces PII spans in the common chat/instruction text fields with typed, +deterministically-indexed placeholders (, , ...). + +Default: a dependency-free regex redactor. Opt-in ML detection: set +OBOL_ANONYMIZER_MODEL to a Hugging Face token-classification PII model. + +Usage: + anonymize.py [--model ID] [--report] +""" +import argparse +import json +import os +import re +import sys + +# Target string fields walked in each record (chat + instruction shapes). +TEXT_FIELDS = ("content", "text", "input", "output", "completion", "prompt", "response") + +# High-signal structured PII. Order matters: earlier wins on overlap. +REGEX = [ + ("PRIVATE_KEY", re.compile(r"-----BEGIN[ A-Z]*PRIVATE KEY-----.*?-----END[ A-Z]*PRIVATE KEY-----", re.S)), + ("ETH_KEY", re.compile(r"\b0x[0-9a-fA-F]{64}\b")), + ("EMAIL", re.compile(r"\b[\w.+-]+@[\w-]+\.[\w.-]+\b")), + ("AWS_KEY", re.compile(r"\bAKIA[0-9A-Z]{16}\b")), + ("GH_TOKEN", re.compile(r"\bgh[pousr]_[A-Za-z0-9]{20,}\b")), + ("OPENAI_KEY", re.compile(r"\bsk-[A-Za-z0-9_\-]{16,}\b")), + ("BEARER", re.compile(r"(?i)\bbearer\s+[A-Za-z0-9._\-]{16,}\b")), + ("SSN", re.compile(r"\b\d{3}-\d{2}-\d{4}\b")), + ("CREDIT_CARD", re.compile(r"\b(?:\d[ -]?){13,19}\b")), + ("IPV4", re.compile(r"\b(?:(?:25[0-5]|2[0-4]\d|1?\d?\d)\.){3}(?:25[0-5]|2[0-4]\d|1?\d?\d)\b")), + ("PHONE", re.compile(r"\b\+?1?[-.\s]?\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}\b")), + ("HOME_PATH", re.compile(r"(?:/Users/|/home/)[^/\s\"']+")), +] + + +def detect_regex(text): + spans = [] + for label, pat in REGEX: + for m in pat.finditer(text): + spans.append((m.start(), m.end(), label)) + return spans + + +def detect_model(text, pipe): + spans = [] + try: + for ent in pipe(text): + label = str(ent.get("entity_group") or ent.get("entity") or "PII").upper() + spans.append((int(ent["start"]), int(ent["end"]), label)) + except Exception as e: # model failure must never crash the run + print(f" warning: model detection failed on a span: {e}", file=sys.stderr) + return spans + + +def resolve_spans(spans): + """Drop overlaps, keeping the earliest-listed (highest-priority) span.""" + spans = sorted(spans, key=lambda s: (s[0], -(s[1] - s[0]))) + out, last_end = [], -1 + for start, end, label in spans: + if start >= last_end: + out.append((start, end, label)) + last_end = end + return out + + +def mask(text, spans, registry, counts): + """Replace spans with deterministic placeholders.""" + for start, end, label in sorted(resolve_spans(spans), key=lambda s: s[0], reverse=True): + raw = text[start:end] + per = registry.setdefault(label, {}) + if raw not in per: + per[raw] = len(per) + 1 + counts[label] = counts.get(label, 0) + 1 + text = text[:start] + f"<{label}_{per[raw]}>" + text[end:] + return text + + +def anonymize_value(value, pipe, registry, counts): + if not isinstance(value, str) or not value: + return value + spans = detect_regex(value) + if pipe is not None: + spans += detect_model(value, pipe) + return mask(value, spans, registry, counts) + + +def walk(obj, pipe, registry, counts): + if isinstance(obj, dict): + return {k: (anonymize_value(v, pipe, registry, counts) if k in TEXT_FIELDS else walk(v, pipe, registry, counts)) for k, v in obj.items()} + if isinstance(obj, list): + return [walk(v, pipe, registry, counts) for v in obj] + return obj + + +def set_hf_home(): + if os.environ.get("HF_HOME"): + return + base = os.environ.get("OBOL_DATA_DIR") + if base: + home = os.path.join(base, "cache", "huggingface") + else: + xdg = os.environ.get("XDG_CACHE_HOME") or os.path.expanduser("~/.cache") + home = os.path.join(xdg, "obol", "huggingface") + os.makedirs(home, exist_ok=True) + os.environ["HF_HOME"] = home + + +def load_pipeline(model_id): + if not model_id: + return None + set_hf_home() + try: + from transformers import pipeline # type: ignore + except Exception: + print(" warning: transformers not installed — falling back to regex-only", file=sys.stderr) + return None + print(f" loading PII model {model_id} (cache: {os.environ['HF_HOME']}) …", file=sys.stderr) + return pipeline("token-classification", model=model_id, aggregation_strategy="simple") + + +def main(): + ap = argparse.ArgumentParser(description="Anonymize a dataset JSONL") + ap.add_argument("input") + ap.add_argument("output") + ap.add_argument("--model", default=os.environ.get("OBOL_ANONYMIZER_MODEL", "")) + ap.add_argument("--report", action="store_true") + args = ap.parse_args() + + pipe = load_pipeline(args.model) + registry, counts, n = {}, {}, 0 + with open(args.input) as fin, open(args.output, "w") as fout: + for line in fin: + line = line.strip() + if not line: + continue + rec = json.loads(line) + fout.write(json.dumps(walk(rec, pipe, registry, counts), ensure_ascii=False) + "\n") + n += 1 + + masked = sum(counts.values()) + print(f"anonymized {n} record(s); masked {masked} PII span(s)" + (" via regex" if pipe is None else " via model+regex")) + if args.report: + for label in sorted(counts): + print(f" {label:14s} {counts[label]}") + + +if __name__ == "__main__": + main() diff --git a/internal/embed/skills/finetune-backend/SKILL.md b/internal/embed/skills/finetune-backend/SKILL.md new file mode 100644 index 00000000..56460e42 --- /dev/null +++ b/internal/embed/skills/finetune-backend/SKILL.md @@ -0,0 +1,57 @@ +--- +name: finetune-backend +description: Fine-tune a local model on a purchased/owned dataset through one pluggable backend contract (mock, mlx-lora, unsloth, axolotl, torchtune). Emits adapter + eval + a run.manifest binding the output to the dataset's content-address. +--- + +# finetune-backend + +Run a LoRA/SFT fine-tune over a dataset's `sft.jsonl` artifact through a single +thin contract, selectable per machine: + +``` +run(dataset_path, base_model, hyperparams) -> { adapter, eval_metric, run.manifest } +``` + +Every backend reads the **same** JSONL artifact (the bytes you downloaded with +`obol buy dataset`), so swapping backends never reshapes your data. The runner +binds each result to the exact dataset it trained on by writing the dataset's +content-address (`manifestHash`) into `run.manifest` — the provenance link from +a served/sold model back to the data that produced it. + +## Backends + +| `--backend` | Tool | Hardware | Notes | +|---|---|---|---| +| `mock` *(default)* | none | any | validates the contract + provenance with no framework; emits a deterministic stub adapter + eval. Use in CI/smoke. | +| `mlx-lora` | MLX-LM | Apple silicon | near-native chat-JSONL; native LoRA | +| `unsloth` | Unsloth | NVIDIA | fast QLoRA; on GB10 (sm_121) run eager (FA3 has no kernel) | +| `axolotl` | axolotl | multi-GPU | YAML-config; exposes grad-accum | +| `torchtune` | torchtune | NVIDIA | modular recipes; guard `torch.compile` | + +The only thing shared across real backends is "regex-extract the eval metric +from the backend CLI's stdout" — each backend's command + file layout is +otherwise its own. Add a backend by registering one `(build_cmd, metric_regex)` +pair in `BACKENDS`. + +## Usage + +```bash +# Contract/provenance smoke (no GPU, no framework): +python3 scripts/runner.py --backend mock \ + --dataset my-dataset-v1.jsonl --base-model qwen2.5-0.5b \ + --manifest-hash --out ./run + +# Real run on a GPU box: +python3 scripts/runner.py --backend unsloth \ + --dataset my-dataset-v1.jsonl --base-model unsloth/Qwen2.5-0.5B \ + --manifest-hash --lora-rank 16 --epochs 1 --out ./run + +cat ./run/run.manifest # dataset_hash == the version you bought +cat ./run/eval.json # {eval_loss, backend, base_model} +``` + +`run.manifest` is the exact deliverable shape the bounty `finetune@v1` task +declares (`adapter.safetensors` + `eval.json` + `run.manifest` with +`dataset_hash`), so a standalone run and a verified/bounty run stay consistent. +A `--dry-run` validates the dataset and emits the manifest without invoking the +backend. diff --git a/internal/embed/skills/finetune-backend/scripts/runner.py b/internal/embed/skills/finetune-backend/scripts/runner.py new file mode 100644 index 00000000..514253ae --- /dev/null +++ b/internal/embed/skills/finetune-backend/scripts/runner.py @@ -0,0 +1,159 @@ +#!/usr/bin/env python3 +"""Pluggable fine-tune backend runner. + +One contract over several backends: read a dataset's sft.jsonl, run a LoRA/SFT +fine-tune, and emit adapter + eval.json + run.manifest. run.manifest binds the +result to the dataset's content-address (manifestHash) — the provenance link +from a model back to the data it was trained on. + +The only thing shared across real backends is "regex-extract the eval metric +from stdout"; each backend otherwise has its own command + layout. + +Usage: + runner.py --backend --dataset --base-model + [--manifest-hash H] [--lora-rank N] [--epochs E] [--lr LR] + [--out DIR] [--dry-run] +""" +import argparse +import hashlib +import json +import os +import re +import subprocess +import sys +import time + + +def file_sha256(path): + h = hashlib.sha256() + with open(path, "rb") as f: + for chunk in iter(lambda: f.read(1 << 20), b""): + h.update(chunk) + return h.hexdigest() + + +def count_records(path): + n = 0 + with open(path) as f: + for line in f: + if line.strip(): + json.loads(line) # validate JSONL + n += 1 + return n + + +# --- backend command builders: (argv, eval_loss_regex) --- + +def build_mlx(ds, base, hp, out): + argv = ["mlx_lm.lora", "--model", base, "--train", "--data", os.path.dirname(ds) or ".", + "--iters", str(hp["epochs"] * 100), "--adapter-path", out] + return argv, re.compile(r"[Vv]al loss[:\s]+([0-9.]+)") + + +def build_unsloth(ds, base, hp, out): + argv = [sys.executable, "-m", "unsloth.cli", "--model", base, "--dataset", ds, + "--lora-rank", str(hp["lora_rank"]), "--epochs", str(hp["epochs"]), + "--lr", str(hp["lr"]), "--output", out] + return argv, re.compile(r"eval_loss['\"]?\s*[:=]\s*([0-9.]+)") + + +def build_axolotl(ds, base, hp, out): + argv = ["accelerate", "launch", "-m", "axolotl.cli.train", os.path.join(out, "axolotl.yaml")] + return argv, re.compile(r"eval_loss['\"]?\s*[:=]\s*([0-9.]+)") + + +def build_torchtune(ds, base, hp, out): + argv = ["tune", "run", "lora_finetune_single_device", "--config", os.path.join(out, "tune.yaml")] + return argv, re.compile(r"eval_loss['\"]?\s*[:=]\s*([0-9.]+)") + + +BACKENDS = { + "mlx-lora": build_mlx, + "unsloth": build_unsloth, + "axolotl": build_axolotl, + "torchtune": build_torchtune, +} + + +def run_real(backend, ds, base, hp, out): + build = BACKENDS[backend] + argv, metric_re = build(ds, base, hp, out) + print(f" $ {' '.join(argv)}", file=sys.stderr) + proc = subprocess.run(argv, capture_output=True, text=True) + sys.stderr.write(proc.stderr[-2000:]) + combined = proc.stdout + "\n" + proc.stderr + m = None + for mm in metric_re.finditer(combined): + m = mm # last match wins (final eval) + if proc.returncode != 0: + raise SystemExit(f"backend {backend} exited {proc.returncode}") + if not m: + raise SystemExit(f"backend {backend} produced no eval metric on stdout") + return float(m.group(1)) + + +def run_mock(ds, base, hp, out, records): + # Deterministic, framework-free: synthesize a plausible eval_loss from the + # data + hyperparams so the contract + provenance can be validated anywhere. + seed = int(file_sha256(ds)[:8], 16) + eval_loss = round(1.5 + (seed % 1000) / 2000.0 - 0.02 * hp["epochs"], 6) + with open(os.path.join(out, "adapter.safetensors"), "wb") as f: + f.write(b"OBOL-MOCK-ADAPTER\x00" + ds.encode() + b"\x00" + base.encode()) + print(f" mock backend: {records} records, base {base} -> eval_loss {eval_loss}", file=sys.stderr) + return eval_loss + + +def main(): + ap = argparse.ArgumentParser(description="Pluggable fine-tune backend runner") + ap.add_argument("--backend", default="mock", choices=["mock"] + list(BACKENDS)) + ap.add_argument("--dataset", required=True) + ap.add_argument("--base-model", required=True) + ap.add_argument("--manifest-hash", default="", help="dataset content-address (manifestHash) to bind into run.manifest") + ap.add_argument("--lora-rank", type=int, default=16) + ap.add_argument("--epochs", type=int, default=1) + ap.add_argument("--lr", type=float, default=2e-4) + ap.add_argument("--out", default="./run") + ap.add_argument("--dry-run", action="store_true") + args = ap.parse_args() + + os.makedirs(args.out, exist_ok=True) + records = count_records(args.dataset) + if records == 0: + raise SystemExit("dataset is empty") + dataset_file_hash = file_sha256(args.dataset) + hp = {"lora_rank": args.lora_rank, "epochs": args.epochs, "lr": args.lr} + + eval_loss = None + if args.dry_run: + print(f" dry-run: {records} valid records, would run {args.backend}", file=sys.stderr) + elif args.backend == "mock": + eval_loss = run_mock(args.dataset, args.base_model, hp, args.out, records) + else: + eval_loss = run_real(args.backend, args.dataset, args.base_model, hp, args.out) + + eval_json = {"eval_loss": eval_loss, "backend": args.backend, "base_model": args.base_model, "records": records} + with open(os.path.join(args.out, "eval.json"), "w") as f: + json.dump(eval_json, f, indent=2) + + manifest = { + # The provenance link: this fine-tune is bound to exactly the dataset + # version it trained on. + "dataset_hash": args.manifest_hash or dataset_file_hash, + "dataset_file_hash": dataset_file_hash, + "base_model": args.base_model, + "backend": args.backend, + "hyperparams": hp, + "eval_loss": eval_loss, + "adapter": "adapter.safetensors", + "records": records, + "created_at": int(time.time()), + } + with open(os.path.join(args.out, "run.manifest"), "w") as f: + json.dump(manifest, f, indent=2) + + print(json.dumps({"out": args.out, "backend": args.backend, "eval_loss": eval_loss, + "dataset_hash": manifest["dataset_hash"]})) + + +if __name__ == "__main__": + main() diff --git a/internal/embed/skills/monetize-guide/SKILL.md b/internal/embed/skills/monetize-guide/SKILL.md index 14b11d56..fe9d4ef3 100644 --- a/internal/embed/skills/monetize-guide/SKILL.md +++ b/internal/embed/skills/monetize-guide/SKILL.md @@ -116,6 +116,7 @@ python3 ${OBOL_SKILLS_DIR:-/data/.openclaw/skills}/discovery/scripts/discovery.p | LLM inference (large, >14B) | 0.005–0.02 USDC/req | High quality, slower | | Data API / indexer | 0.0001–0.001 USDC/req | Depends on query complexity | | Compute-heavy (GPU hours) | 0.10–1.00 USDC/hour | Fine-tuning, training | +| Skill bundle (one-shot download) | 0.05–5 USDC/download | Priced per download, not per use | **Always present your research and recommendation to the user and ask them to confirm the price before proceeding.** @@ -166,6 +167,43 @@ The `--endpoint` must include `/v1` if the upstream is an OpenAI-compatible serv LAN IPs (e.g., `http://192.168.0.202:8000/v1`) are reachable from inside the k3d cluster without any additional network configuration. +#### Skill Bundle (paid download of one of your skills) + +A skill directory (`SKILL.md` + scripts) can be sold as a single hash-pinned +gzipped bundle behind x402 (`type=skill` ServiceOffer): + +```bash +# --from-embedded , or --from for a custom skill +obol sell skill \ + --from-embedded \ + --skill-version 0.1.0 \ + --per-request \ + --chain base-sepolia \ + --pay-to +``` + +The CLI packs the bundle deterministically (compressed cap 900000 bytes), +pins its sha256 in the offer, and the controller serves it from a tiny +bundle server at `/services//bundle.tar.gz` (+ `/skill.json` +metadata). Buyers verify the download against the sha256 advertised in the +402 response's `extra.skill` block before AND after paying. + +When you (the agent) need to publish a skill yourself without the host CLI, +use raw K8s objects — see the `sell` skill's "Selling a Skill Bundle +(type=skill)" section. Your ConfigMap write RBAC is limited to your own +namespace, so both the bundle ConfigMap and the ServiceOffer must be +created there. + +On-chain hash pinning and ratings (`obol skills calldata set-hash` / +`obol skills calldata feedback`, tag1=`asr:skill`) only PRINT calldata — +the OPERATOR submits it with their own wallet. Never sign or submit these +transactions yourself; present the printed command and calldata to the user. + +To instead sell the skill as a live, invocable service, sell the agent that +carries it: `obol agent new --skills ` then `obol sell +agent `. `obol sell skill` sells the bundle bytes; `obol sell +agent` sells execution. + #### HTTP Service (in-cluster) ```bash diff --git a/internal/embed/skills/research-program/SKILL.md b/internal/embed/skills/research-program/SKILL.md new file mode 100644 index 00000000..b923f3b9 --- /dev/null +++ b/internal/embed/skills/research-program/SKILL.md @@ -0,0 +1,87 @@ +--- +name: research-program +description: Stand up a decentralized auto-research program on the Obol Stack — publish a research ID, admit worker runners over the open internet, collect hypotheses/results in a private collective knowledge base, and distribute rewards proportional to validated impact. Wraps `obol research` + the worker runner; true to karpathy/autoresearch. +--- + +# Research Program (decentralized auto-research) + +Publish a **research ID**, let **worker runners on any machine** join over the +open internet, have them run real experiments and post results to a **collective +knowledge base private to the group**, and pay out **proportional to validated +impact**. The owner is a pure coordinator — it never runs an experiment. + +This wraps two commands: `obol research` (owner) and `scripts/worker.py` (runner). + +## Declarative model (true to autoresearch) + +A program is essentially a `TASK.md` frontmatter: an **arbitrary metric**, a +**direction**, and a **KEEP rule**. Any domain lands without a schema change. + +``` +metric val_bpb # any string: val_bpb, auc, latency_ms, ΔΔG, … +direction minimize|maximize +accept beats-champion|threshold +split by-impact|champion-takes-all +membership open|invite +``` + +Operational policy (how a runner sets up GPUs, which hypotheses to try) is +off-chain — it lives in `program.md` and in the runner, exactly as +AutoScientists keeps `LAUNCH.md` off-chain. + +## 1. Owner — publish the program (on your machine) + +```bash +obol research publish nanogpt-valbpb \ + --objective "Drive nanoGPT val_bpb down" \ + --metric val_bpb --direction minimize --accept beats-champion \ + --baseline 1.20 --pool 100 --token OBOL --network base-sepolia \ + --membership invite --split by-impact +``` + +This starts the KB + membership server on your machine and opens a **Cloudflare +tunnel**, printing a public URL like `https://.trycloudflare.com`. Workers on +other machines reach the KB at that URL; every KB route is gated by a member +token (the device-auth flow below), so the program stays private to the group +while being reachable over the open internet. Runs in the foreground. + +## 2. Runner — join and contribute (on each GPU machine) + +```bash +python3 worker.py --kb https://.trycloudflare.com \ + --program nanogpt-valbpb --worker spark1 --time-budget 60 +``` + +The runner prints a **join code** and waits. Default experiment is the real +karpathy/autoresearch nanoGPT loop (`uv run train.py`, parsing `val_bpb:`); pass +`--experiment ": '>"` for any other task. The +runner needs the autoresearch repo prepared once (`uv run prepare.py`). + +## 3. Owner — admit the runner (the membership decision) + +```bash +obol research approve +``` + +Only the owner can approve, so the owner alone decides who joins. With +`--membership open`, runners are auto-admitted and this step is skipped. + +## 4. Owner — watch progress and settle + +```bash +obol research status nanogpt-valbpb +``` + +Shows the roster, every submitted result, the current champion, and the +impact-proportional payout split. First-verified-wins on duplicate +improvements; payout share ∝ each accepted result's validated metric gain. + +## What's private vs. public + +- **Private to the group** (token-gated): the KB — `/task`, `/champion`, + `/results`, `/status`. A request without a valid member token for THIS + program gets 401/403. +- **Public** (the secret is the device code, RFC 8628): `/auth/device/code`, + `/auth/device/token`. Owner-only: `/auth/device/approve`. + +Never expose the KB as an open public route — membership is the whole point. diff --git a/internal/embed/skills/research-program/scripts/worker.py b/internal/embed/skills/research-program/scripts/worker.py new file mode 100644 index 00000000..494a6b36 --- /dev/null +++ b/internal/embed/skills/research-program/scripts/worker.py @@ -0,0 +1,206 @@ +#!/usr/bin/env python3 +"""Auto-research worker — the runner side of an obol decentralized research program. + +Joins a program's collective knowledge base over the open internet (the +owner's Cloudflare URL), runs one real experiment (karpathy/autoresearch +nanoGPT by default), and posts its metric back. The KB decides KEEP/REJECT +and tracks the champion; rewards are settled by the owner. + +Stdlib only (urllib/json/subprocess) so it drops onto any runner with no +install. The membership flow is RFC 8628 device-auth: print a user code, the +owner approves it, we poll for a member token, then every KB call carries it. + +Usage: + python3 worker.py --kb --program --worker \\ + [--time-budget 60] [--repo ~/autoresearch] [--experiment ""] + +Without --experiment it runs the autoresearch baseline: + cd && TIME_BUDGET override && uv run train.py → parses 'val_bpb:'. +""" + +import argparse +import json +import os +import re +import subprocess +import sys +import time +import urllib.error +import urllib.request + + +def _post(url, token, body, timeout=120): + data = json.dumps(body).encode() + req = urllib.request.Request(url, data=data, method="POST") + req.add_header("Content-Type", "application/json") + if token: + req.add_header("Authorization", "Bearer " + token) + with urllib.request.urlopen(req, timeout=timeout) as r: + return json.loads(r.read().decode()) + + +def _get(url, token, timeout=60): + req = urllib.request.Request(url, method="GET") + if token: + req.add_header("Authorization", "Bearer " + token) + with urllib.request.urlopen(req, timeout=timeout) as r: + return json.loads(r.read().decode()) + + +def log(msg): + print(msg, file=sys.stderr, flush=True) + + +def join(kb, program, worker): + """Device-auth: get a code, wait for owner approval, return a member token.""" + grant = _post(kb + "/auth/device/code", None, {"worker": worker}) + user_code = grant["user_code"] + interval = max(2, int(grant.get("interval", 5))) + log("") + log("=" * 52) + log(" JOIN CODE for %s: %s" % (program, user_code)) + log(" Owner runs: obol research approve %s" % user_code) + log("=" * 52) + log("") + deadline = time.time() + int(grant.get("expires_in", 900)) + while time.time() < deadline: + res = _post(kb + "/auth/device/token", None, {"device_code": grant["device_code"]}) + if res.get("status") == "authorized": + log("Admitted to %s." % program) + return res["token"] + time.sleep(interval) + raise SystemExit("join timed out waiting for owner approval") + + +# Hardware adaptation applied to train.py before running. autoresearch ships +# a FlashAttention-3 attention path (flash_attn_func) that has no kernel image +# for some GPUs (e.g. NVIDIA GB10 / Blackwell sm_121). train.py is the file an +# autoresearch agent edits, so swapping that one call to PyTorch-native SDPA — +# which runs on any CUDA device — is a legitimate, in-framework adaptation. +_FA3_CALL = "y = fa3.flash_attn_func(q, k, v, causal=True, window_size=window_size)" +_SDPA_CALL = ("y = torch.nn.functional.scaled_dot_product_attention(" + "q.transpose(1, 2), k.transpose(1, 2), v.transpose(1, 2), " + "is_causal=True, enable_gqa=True).transpose(1, 2)") + +# Model/batch fit: the default 124M model at DEVICE_BATCH_SIZE=128 (×2048 seq) +# OOM-kills on a memory-pressured GPU. Shrink to a small GPT at a small batch +# so eager training fits and finishes fast — train.py's own comment says +# "reduce if OOM". Still a real GPT producing a real val_bpb. +_TRAIN_SUBS = [ + (_FA3_CALL, _SDPA_CALL), + (" n_layer: int = 12", " n_layer: int = 4"), + (" n_embd: int = 768", " n_embd: int = 512"), + ("DEVICE_BATCH_SIZE = 128", "DEVICE_BATCH_SIZE = 8"), + # One micro-batch per optimizer step: the default 2**19 token batch means + # 32 grad-accum micro-steps per step (~35s/step eager on GB10). 2**14 = + # batch*seq, so grad_accum_steps=1 and a step is ~1s — train.py needs + # step>10 to stop, so this keeps the whole run to seconds, not minutes. + ("TOTAL_BATCH_SIZE = 2**19", "TOTAL_BATCH_SIZE = 2**14"), +] + +_PREP = r''' +import re, shutil +shutil.copy("train.py", "train.py.obolbak"); shutil.copy("prepare.py", "prepare.py.obolbak") +t = open("train.py").read() +for a, b in {subs!r}: + t = t.replace(a, b) +open("train.py", "w").write(t) +p = open("prepare.py").read() +p = re.sub(r"^TIME_BUDGET = .*", "TIME_BUDGET = {budget}", p, flags=re.M) +p = re.sub(r"^EVAL_TOKENS = .*", "EVAL_TOKENS = 131072 # shrunk for fast eager eval", p, flags=re.M) +open("prepare.py", "w").write(p) +''' + + +def run_autoresearch(repo, time_budget): + """Run the real karpathy/autoresearch training; return (val_bpb, tail). + + Adapts train.py for the local GPU (FA3 → SDPA), shrinks the fixed time + budget, runs eager (FA3's absence makes torch.compile tracing moot on + these devices), then restores the originals. + """ + repo = os.path.expanduser(repo) + if not os.path.isdir(repo): + raise SystemExit("autoresearch repo not found at %s" % repo) + env = dict(os.environ) + env["PATH"] = os.path.expanduser("~/.local/bin") + ":" + env.get("PATH", "") + # Run eager (no torch.compile). On bleeding-edge GPUs (NVIDIA GB10 / + # Blackwell sm_121a) Triton/ptxas can't yet assemble inductor kernels; + # eager has no Triton dependency and just runs. The cost of eager is a + # slow final eval over EVAL_TOKENS, which we shrink below so the run + # completes quickly — both are legitimate train.py-for-this-hardware edits. + env["TORCHDYNAMO_DISABLE"] = "1" + + prep = _PREP.format(subs=_TRAIN_SUBS, budget=int(time_budget)) + cmd = ( + "cd %s && python3 -c %s && " + "uv run --no-sync python train.py; " + "mv -f train.py.obolbak train.py 2>/dev/null; mv -f prepare.py.obolbak prepare.py 2>/dev/null || true" + % (repo, _shquote(prep)) + ) + log("Running autoresearch experiment (TIME_BUDGET=%ss, GB10-adapted) …" % time_budget) + p = subprocess.run(["bash", "-lc", cmd], env=env, capture_output=True, text=True) + out = (p.stdout or "") + "\n" + (p.stderr or "") + m = re.search(r"^val_bpb:\s*([0-9.]+)", out, re.MULTILINE) + if not m: + log(out[-2000:]) + raise SystemExit("could not parse val_bpb from train.py output") + return float(m.group(1)), out[-1500:] + + +def _shquote(s): + return "'" + s.replace("'", "'\\''") + "'" + + +def run_custom(experiment, metric): + """Run an arbitrary experiment shell command; parse ': '.""" + p = subprocess.run(["bash", "-lc", experiment], capture_output=True, text=True) + out = (p.stdout or "") + "\n" + (p.stderr or "") + m = re.search(r"^%s:\s*([0-9.eE+-]+)" % re.escape(metric), out, re.MULTILINE) + if not m: + log(out[-2000:]) + raise SystemExit("could not parse '%s:' from experiment output" % metric) + return float(m.group(1)), out[-1500:] + + +def main(): + ap = argparse.ArgumentParser() + ap.add_argument("--kb", required=True, help="owner KB base URL (Cloudflare)") + ap.add_argument("--program", required=True) + ap.add_argument("--worker", required=True) + ap.add_argument("--time-budget", type=int, default=60) + ap.add_argument("--repo", default="~/autoresearch") + ap.add_argument("--experiment", default="", help="custom experiment shell cmd (else autoresearch)") + args = ap.parse_args() + + kb = args.kb.rstrip("/") + + token = join(kb, args.program, args.worker) + + task = _get(kb + "/task", token) + metric = task["program"]["criteria"]["metric"] + champ = task.get("champion") + log("Task: optimize %s (%s). Current champion: %s" % ( + metric, task["program"]["criteria"]["direction"], + ("%.6f" % champ["value"]) if champ else "none")) + + if args.experiment: + value, tail = run_custom(args.experiment, metric) + else: + value, tail = run_autoresearch(args.repo, args.time_budget) + log("Experiment %s = %.6f" % (metric, value)) + + res = _post(kb + "/results", token, {"worker": args.worker, "value": value, "output": tail}) + verdict = "KEPT (new champion)" if res.get("champion") else ("ACCEPTED" if res.get("accepted") else "rejected") + log("Submitted: %s = %.6f → %s (impact %.6f)" % (metric, value, verdict, res.get("impact", 0.0))) + # Machine-readable final line on stdout. + print(json.dumps({"worker": args.worker, "metric": metric, "value": value, + "accepted": res.get("accepted"), "champion": res.get("champion"), + "impact": res.get("impact")})) + + +if __name__ == "__main__": + try: + main() + except urllib.error.HTTPError as e: + raise SystemExit("HTTP %s: %s" % (e.code, e.read().decode()[:300])) diff --git a/internal/embed/skills/sell/SKILL.md b/internal/embed/skills/sell/SKILL.md index 07e85b6d..1c7f4f57 100644 --- a/internal/embed/skills/sell/SKILL.md +++ b/internal/embed/skills/sell/SKILL.md @@ -1,6 +1,6 @@ --- name: sell -description: "Sell access to services via x402 payment gating. Create ServiceOffer CRDs that automatically health-check upstreams, create payment-gated routes, and optionally pull models and register on ERC-8004. Supports inference, HTTP, and fine-tuning service types." +description: "Sell access to services via x402 payment gating. Create ServiceOffer CRDs that automatically health-check upstreams, create payment-gated routes, and optionally pull models and register on ERC-8004. Supports inference, HTTP, fine-tuning, agent, and skill (paid skill-bundle download) service types." metadata: { "openclaw": { "emoji": "\ud83d\udcb0", "requires": { "bins": ["python3"] } } } --- @@ -12,6 +12,7 @@ Sell access to services via ServiceOffer custom resources. Each ServiceOffer des - Exposing a local Ollama model for paid inference - Creating payment-gated routes for any upstream service +- Selling one of your own skills as a paid, hash-pinned bundle download (`type=skill` — see "Selling a Skill Bundle" below) - Checking the status of monetized services - Listing or deleting existing service offers - Processing pending offers that haven't been fully reconciled @@ -63,6 +64,264 @@ python3 scripts/monetize.py delete my-inference --namespace llm | `process --all` | Wait for all non-Ready offers to converge | | `delete --namespace ` | Delete an offer and its owned resources | +## Selling a Skill Bundle (type=skill) + +A skill — a directory with a top-level `SKILL.md` plus optional `scripts/` +and `references/` — can itself be sold as a single downloadable, ratable +unit. A `type=skill` ServiceOffer points at a ConfigMap holding the gzipped +bundle; the `serviceoffer-controller` hash-verifies the bytes, renders a +static bundle server (`so--bundle`, busybox httpd on port 8080) in the +offer's namespace, and gates `/services//*` behind x402 like any +other offer. Buyers pay the flat `perRequest` price per download. + +Two ways to publish: + +1. **Host CLI (operator runs it)**: `obol sell skill --from ` + or `--from-embedded ` packs canonically, writes the ConfigMap + (server-side apply), and creates the offer in one shot. Prefer this when + a human is driving. +2. **Raw K8s objects (you, the agent)**: create the bundle ConfigMap and the + ServiceOffer yourself with the RBAC you already have. Documented below. + +### Where your objects must live (RBAC) + +Your ServiceAccount can CRUD `serviceoffers` cluster-wide, but ConfigMap +writes are granted ONLY in your own namespace (`hermes-obol-agent`) through +the namespaced `hermes-skill-publish` Role (verbs: create/get/update/patch — +no list, watch, or delete). The controller reads the bundle ConfigMap from +the **offer's** namespace. Consequence: create BOTH the ConfigMap AND the +ServiceOffer in your own namespace, side by side. + +### Packaging contract (deterministic) + +The artifact is a gzipped tar of the skill directory. The canonical packer +(`obol sell skill` / `internal/skillpkg.Pack`) normalizes: + +- A top-level `SKILL.md` is REQUIRED — a bundle without it is not a skill. +- `__pycache__/` dirs and `*.pyc` files are skipped; symlinks are rejected. +- Entries are sorted by slash-separated path; tar format is USTAR. +- File mode normalized to `0644` (`0755` when any exec bit is set on the + source); directories `0755`; mtime epoch 0; uid/gid 0; empty uname/gname. +- gzip at max compression with an empty name, mtime 0, OS byte 255. +- **Cap**: the compressed bundle must be <= 900000 bytes (`MaxSkillBundleBytes`, + enforced by the CLI and again by the controller). Trim the skill if over. +- `spec.skill.sha256` is the lowercase hex SHA-256 of the **gzipped bytes** — + it MUST equal the hash of the exact bytes stored in + `binaryData["bundle.tar.gz"]`, or the controller refuses to publish + (`BundleHashMismatch`). + +Determinism caveat: the same source tree packed by the canonical Go packer +always yields the same hash (audit-friendly, keeps an on-chain pin stable +across republish). DEFLATE output is implementation-specific, so a Python +repack of identical files produces a different — still valid — hash. The +binding contract is only ever `sha256(uploaded bytes) == spec.skill.sha256`; +whatever bytes you upload, hash those. + +### Pack + publish from inside the pod + +One self-contained script: pack (mirroring the canonical normalization), +upload the ConfigMap, create the offer. Adjust `SRC`, names, price, and +`payTo` (your wallet from `signer.py accounts`). + +```python +import base64, gzip, hashlib, io, json, os, sys, tarfile + +SKILLS = os.environ.get("OBOL_SKILLS_DIR", "/data/.hermes/obol-skills") +sys.path.insert(0, os.path.join(SKILLS, "obol-stack", "scripts")) +from kube import load_sa, make_ssl_context, api_get, api_post, api_patch + +SRC = os.path.join(SKILLS, "my-skill") # must contain SKILL.md at top level +NS = "hermes-obol-agent" # YOUR namespace — see RBAC note +OFFER, VERSION = "my-skill", "0.1.0" +CM = f"{OFFER}-skill-bundle" +PAY_TO = "0xYourWalletAddress" + +# -- canonical pack ---------------------------------------------------- +if not os.path.isfile(os.path.join(SRC, "SKILL.md")): + raise SystemExit("not a skill: top-level SKILL.md missing") +paths = [] +for root, dirs, files in os.walk(SRC): + dirs[:] = sorted(d for d in dirs if d != "__pycache__") + for name in sorted(dirs + files): + p = os.path.join(root, name) + if os.path.islink(p): + raise SystemExit(f"symlink not allowed: {p}") + if not name.endswith(".pyc"): + paths.append(p) +paths.sort(key=lambda p: os.path.relpath(p, SRC).replace(os.sep, "/")) +tar_buf = io.BytesIO() +with tarfile.open(fileobj=tar_buf, mode="w", format=tarfile.USTAR_FORMAT) as tf: + for p in paths: + rel = os.path.relpath(p, SRC).replace(os.sep, "/") + info = tarfile.TarInfo(rel + "/" if os.path.isdir(p) else rel) + info.mtime = info.uid = info.gid = 0 + info.uname = info.gname = "" + if os.path.isdir(p): + info.type, info.mode = tarfile.DIRTYPE, 0o755 + tf.addfile(info) + else: + data = open(p, "rb").read() + info.size = len(data) + info.mode = 0o755 if os.stat(p).st_mode & 0o111 else 0o644 + tf.addfile(info, io.BytesIO(data)) +gz_buf = io.BytesIO() +# filename="" keeps the gzip FNAME header empty (determinism rule). +with gzip.GzipFile(filename="", fileobj=gz_buf, mode="wb", compresslevel=9, mtime=0) as gz: + gz.write(tar_buf.getvalue()) +bundle = gz_buf.getvalue() +if len(bundle) > 900000: + raise SystemExit(f"bundle {len(bundle)} bytes > 900000-byte cap — trim the skill") +sha = hashlib.sha256(bundle).hexdigest() +print(f"bundle: {len(bundle)} bytes sha256={sha}") + +# -- bundle ConfigMap (create, or merge-patch when it exists) ---------- +token, _ = load_sa() +ctx = make_ssl_context() +cm = {"apiVersion": "v1", "kind": "ConfigMap", + "metadata": {"name": CM, "namespace": NS}, + "binaryData": {"bundle.tar.gz": base64.b64encode(bundle).decode()}} +try: + api_get(f"/api/v1/namespaces/{NS}/configmaps/{CM}", token, ctx, quiet=True) + api_patch(f"/api/v1/namespaces/{NS}/configmaps/{CM}", cm, token, ctx) +except SystemExit: + api_post(f"/api/v1/namespaces/{NS}/configmaps", cm, token, ctx) + +# -- the ServiceOffer --------------------------------------------------- +offer = { + "apiVersion": "obol.org/v1alpha1", "kind": "ServiceOffer", + "metadata": {"name": OFFER, "namespace": NS}, + "spec": { + "type": "skill", + "skill": {"name": OFFER, "version": VERSION, "sha256": sha, + "bundleConfigMap": CM, + "displayName": "My Skill", + "description": "What the skill does, one line."}, + # Anti-spoof invariants — the controller rejects anything else: + "upstream": {"service": f"so-{OFFER}-bundle", # MUST be so--bundle + "namespace": NS, # MUST equal offer namespace + "port": 8080, # MUST be 8080 + "healthPath": "/skill.json"}, + "payment": {"scheme": "exact", "network": "base-sepolia", + "payTo": PAY_TO, "maxTimeoutSeconds": 300, + "price": {"perRequest": "0.25"}}, + "registration": {"enabled": False}, + }, +} +api_post(f"/apis/obol.org/v1alpha1/namespaces/{NS}/serviceoffers", offer, token, ctx) +print(f"ServiceOffer {NS}/{OFFER} created") +``` + +Equivalent standalone YAML (for an operator with kubectl): + +```yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: my-skill-skill-bundle + namespace: hermes-obol-agent +binaryData: + bundle.tar.gz: +--- +apiVersion: obol.org/v1alpha1 +kind: ServiceOffer +metadata: + name: my-skill + namespace: hermes-obol-agent +spec: + type: skill + skill: + name: my-skill # ^[a-z0-9][a-z0-9-]*$, max 64 + version: "0.1.0" # ^[A-Za-z0-9][A-Za-z0-9._-]*$, max 64 + sha256: "<64-char lowercase hex of the gzipped bundle bytes>" + bundleConfigMap: my-skill-skill-bundle + displayName: "My Skill" + description: "What the skill does, one line." + upstream: + service: so-my-skill-bundle # MUST be so--bundle + namespace: hermes-obol-agent # MUST equal the offer namespace + port: 8080 # MUST be 8080 + healthPath: /skill.json + payment: + scheme: exact + network: base-sepolia + payTo: "0xYourWalletAddress" + maxTimeoutSeconds: 300 + price: + perRequest: "0.25" + registration: + enabled: false +``` + +Note: apply the ConfigMap **server-side** (`kubectl apply --server-side`). +Client-side apply writes the whole object into the last-applied-configuration +annotation, which blows the 256KiB annotation cap for bundles over ~190KB. + +### Watch reconciliation + +```bash +python3 scripts/monetize.py status my-skill --namespace hermes-obol-agent +``` + +The usual ladder applies (ModelReady is `True/Skipped` for skills). Before +`UpstreamHealthy` can pass, the controller verifies the bundle; skill-specific +`UpstreamHealthy=False` reasons: + +| Reason | Meaning / fix | +|--------|---------------| +| `InvalidSkillUpstream` | `spec.upstream` is not the controller-rendered bundle server (`so--bundle` / offer namespace / port 8080). Fix the spec — a skill offer may only ever advertise its own bundle server. | +| `BundleMissing` | `spec.skill.bundleConfigMap` not found in the offer's namespace. Create it (controller requeues automatically). | +| `BundleTooLarge` | Compressed bytes exceed 900000. Trim the skill and republish. | +| `BundleHashMismatch` | `sha256(binaryData["bundle.tar.gz"]) != spec.skill.sha256`. Re-hash the exact uploaded bytes. | + +Republishing new bundle bytes + updating `spec.skill.sha256` rolls the bundle +server pod automatically (content-hash annotation). + +### What buyers see (pre-purchase integrity) + +An unpaid `GET /services//bundle.tar.gz` returns 402 with the skill +identity in `accepts[0].extra.skill`: + +```json +{"name": "my-skill", "version": "0.1.0", "sha256": "<64-hex>"} +``` + +Point buyers at that `extra.skill.sha256` BEFORE they pay: it is the same +hash the controller verified against the served bytes, so after a paid +download they verify with `sha256sum bundle.tar.gz`. Paid paths on the route: +`/services//bundle.tar.gz` (the artifact) and +`/services//skill.json` (metadata JSON: name, version, sha256, +displayName, description, offer, namespace). Each request costs one +`perRequest` payment. + +### Alternative: sell the skill as a live service instead + +If buyers should *invoke* the skill rather than download it, sell the agent +that carries it through the normal agent path — no skill-specific flag: + +```bash +obol agent new --skills # if not already created +obol sell agent --price 0.001 --chain base-sepolia +``` + +The 402 surfaces `extra.agentSkills` via the normal agent machinery. `obol +sell skill` sells the bundle bytes; `obol sell agent` sells execution. + +### On-chain integrity + rating (OPERATOR-submitted — never you) + +Skill hash pinning and ratings ride ERC-8004 with the tag convention +`tag1="asr:skill"`, `tag2="eip155::::@"`. +The obol CLI only PRINTS calldata; a human operator submits it with their +own wallet. The controller never signs, and **you must never sign or submit +these transactions either** — surface the commands to the user instead: + +```bash +# Pin sha256(bundle) under metadata key skill.sha256:@ +obol skills calldata set-hash --agent-id --skill my-skill@0.1.0 --bundle --chain base-sepolia + +# Rate a skill 0-100 (buyer side; self-feedback from the owner reverts on-chain) +obol skills calldata feedback --agent-id --skill my-skill@0.1.0 --value 95 --chain base-sepolia +``` + ## Reconciliation Flow The `serviceoffer-controller` drives these stages: diff --git a/internal/embed/skills/sell/references/serviceoffer-spec.md b/internal/embed/skills/sell/references/serviceoffer-spec.md index 5ead12d7..d15ae893 100644 --- a/internal/embed/skills/sell/references/serviceoffer-spec.md +++ b/internal/embed/skills/sell/references/serviceoffer-spec.md @@ -62,7 +62,8 @@ spec: | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| -| `spec.type` | string | No | `http` | Workload type: `inference`, `fine-tuning`, or `http` | +| `spec.type` | string | No | `http` | Workload type: `inference`, `fine-tuning`, `http`, `agent`, or `skill` | +| `spec.skill` | object | Required when `type=skill` | — | Skill bundle identity, integrity hash, and artifact ConfigMap (CEL-validated at admission) | | `spec.model` | object | No | — | Model metadata for LLM-backed offers | | `spec.upstream` | object | Yes | — | In-cluster Service that handles the workload | | `spec.payment` | object | Yes | — | x402-aligned payment terms | @@ -70,6 +71,66 @@ spec: | `spec.provenance` | object | No | — | Optional experiment or training provenance metadata | | `spec.registration` | object | No | — | ERC-8004 publication metadata | +### `spec.skill` + +Populated when `spec.type == "skill"` — sells a downloadable skill bundle +(gzipped tar of a `SKILL.md` + scripts directory). The controller verifies +that the ConfigMap bytes hash to `sha256` before rendering the bundle server +(`so--bundle`: busybox httpd, port 8080, serving `/bundle.tar.gz` and +`/skill.json`), and the x402-verifier surfaces name/version/sha256 in the 402 +response's `extra.skill` block for pre-purchase verification. + +| Field | Type | Required | Description | +|-------|------|----------|-------------| +| `spec.skill.name` | string | Yes | Skill name, `^[a-z0-9][a-z0-9-]*$`, max 64. With `version` it forms the skill ref `@` used by ERC-8004 skill tags | +| `spec.skill.version` | string | Yes | Skill version, `^[A-Za-z0-9][A-Za-z0-9._-]*$`, max 64 | +| `spec.skill.sha256` | string | Yes | Lowercase hex SHA-256 of the gzipped bundle bytes, `^[a-f0-9]{64}$` | +| `spec.skill.bundleConfigMap` | string | Yes | Name of a ConfigMap in the **offer's namespace** whose `binaryData["bundle.tar.gz"]` is the artifact (compressed size <= 900000 bytes) | +| `spec.skill.displayName` | string | No | Human-friendly display name, max 128 | +| `spec.skill.description` | string | No | Short description for catalog surfaces, max 1024 | + +Constraints enforced by the controller for `type=skill`: + +- `spec.upstream` MUST be `{service: so--bundle, namespace: + , port: 8080}` — anything else is rejected with + `UpstreamHealthy=False reason=InvalidSkillUpstream` (a skill offer may only + advertise its own controller-rendered bundle server). +- Bundle gate reasons on `UpstreamHealthy=False`: `BundleMissing`, + `BundleTooLarge` (compressed bytes > 900000), `BundleHashMismatch`. +- A spec-level CEL rule rejects `type=skill` offers without `spec.skill` at + admission time. + +Skill example: + +```yaml +apiVersion: obol.org/v1alpha1 +kind: ServiceOffer +metadata: + name: my-skill + namespace: hermes-obol-agent +spec: + type: skill + skill: + name: my-skill + version: "0.1.0" + sha256: "<64-char lowercase hex of the gzipped bundle bytes>" + bundleConfigMap: my-skill-skill-bundle + displayName: "My Skill" + description: "What the skill does." + upstream: + service: so-my-skill-bundle + namespace: hermes-obol-agent + port: 8080 + healthPath: /skill.json + payment: + scheme: exact + network: base-sepolia + payTo: "0xYourWalletAddress" + maxTimeoutSeconds: 300 + price: + perRequest: "0.25" +``` + ### `spec.model` | Field | Type | Required | Description | diff --git a/internal/embed/skills/smoke-test/SKILL.md b/internal/embed/skills/smoke-test/SKILL.md new file mode 100644 index 00000000..be219cc2 --- /dev/null +++ b/internal/embed/skills/smoke-test/SKILL.md @@ -0,0 +1,184 @@ +--- +name: smoke-test +description: "Sellable read-only smoke test of an Obol Stack public surface. The buyer pays per run (x402); the agent GET-probes the target's discovery + payment-gating endpoints, writes a scored report, commits it to the seller-owned public GitHub repo, and hands back the exact `obol smoke calldata` command the OPERATOR runs to derive the ERC-8004 validationResponse calldata. The agent never pays, never signs, never submits chain transactions." +metadata: { "openclaw": { "emoji": "🔍", "requires": { "bins": ["python3"] } } } +--- + +# Smoke Test + +Probe a TARGET Obol Stack public surface **read-only**, score it, publish the +report, and emit the verdict-grounding command. You are the seller side of a +paid smoke-test service: a buyer paid (via x402) for one run against one target. + +Hard rules — these are the product's trust model, never break them: + +- **GET only.** Never send an `X-PAYMENT` header, never sign anything, never + settle anything, never submit a chain transaction. The OPERATOR submits the + on-chain validationResponse from their own wallet — identical to the bounty + stance (the agent/controller never signs validation txs). +- **Never probe cross-host.** The scripts reduce catalog endpoints to their + path and re-join onto the target base URL. Don't hand-probe URLs from the + target's responses. +- **Never echo `GITHUB_TOKEN`** (or any `Authorization` header), never pass it + on a command line. The scripts read it from env only and redact it from + errors. To check it's configured, test presence only: + `[ -n "$GITHUB_TOKEN" ] && [ -n "$GITHUB_REPORT_REPO" ] && echo configured || echo missing` +- **Exactly ≤ 2 GitHub writes per run** (report.md + best-effort latest.md); + `results.json` is never committed. + +## Inputs (from the buyer message) + +The buyer message looks like `smoke-test `, optionally with a +run id. + +- **target** (required): an absolute http(s) base URL, e.g. + `https://` or `http://obol.stack:8080`. If the buyer gave a + bare host, prepend the scheme (`https://` for public hostnames, `http://` + for local stack addresses) BEFORE running the script — the normalized + target (whitespace-stripped, trailing `/` stripped) is hashed into the + on-chain requestHash, so it must be unambiguous. +- **run id** (optional): must match `^[A-Za-z0-9._-]+$`. When absent the + script generates `-<6 hex>`. + +## Run procedure — TWO separate terminal calls + +Terminal calls on CRD agents time out at 80s. The probe alone can take up to +~60s (up to 8 checks × 8s). **Never combine probe and post in one call.** + +**Call 1 — probe (no network writes):** + +```bash +python3 ${OBOL_SKILLS_DIR:-/data/.hermes/obol-skills}/smoke-test/scripts/smoke.py probe [--run-id ] +``` + +Prints `results.json` to stdout and writes +`./smoke///{report.md,results.json}` in the workspace +(`` = lowercase hostname with `:` → `-`, e.g. +`obol.stack:8080` → `obol.stack-8080`). Exit 0 even when checks fail — the +score IS the verdict. Non-zero only on operational errors. + +**Call 2 — post (only when BOTH `GITHUB_TOKEN` and `GITHUB_REPORT_REPO` are set):** + +```bash +python3 ${OBOL_SKILLS_DIR:-/data/.hermes/obol-skills}/smoke-test/scripts/gh_post.py ./smoke// +``` + +Commits `report.md` to `reports//.md` in the seller repo, +updates the local `results.json` with the commit-pinned permalink, best-effort +updates `reports//latest.md`. The only stdout payload lines are: + +``` +permalink: https://github.com///blob//reports//.md +content-sha: +``` + +If the GitHub env is absent, **degrade gracefully**: skip Call 2, tell the +buyer the report is local-only (no permalink), and still return the full +results + calldata command (without `--response-uri`). + +If Call 2 fails (non-zero exit), the report stays local and `post` is +re-runnable: `python3 .../scripts/smoke.py post ./smoke//` +(prints the updated results.json). + +## What gets probed + +All checks are GET-only, 8s timeout, 1 MiB body cap, no redirects, User-Agent +`obol-smoke-test/1.0 (+https://github.com/ObolNetwork/obol-stack)`: + +1. `skill-md` — `/skill.md` → 200 + non-empty body (counted) +2. `services-json` — `/api/services.json` → 200 + bare JSON **list** + of objects with non-empty string `name` and `endpoint` (counted; an empty + catalog passes) +3. `x402-402:` — per advertised service (first 5, sorted by name), + GET the endpoint's **path** on the target → 402 with a valid x402 body: + `x402Version` present, non-empty `accepts`, each entry with non-empty + `scheme`/`network`, 0x40-hex `payTo`/`asset`, and a positive digits-only + `maxAmountRequired` or `amount` (one counted check per service) +4. `agent-registration` — `/.well-known/agent-registration.json` → + 200 + JSON object (**informational** — excluded from passed/total/score) + +Scoring over counted checks only: `score100 = floor(100*passed/total)` (the +on-chain value — the deployed registry rejects responses above 100) and +`score255 = floor(255*passed/total)` (off-chain field kept in results.json). + +## Reply to the buyer + +After the run, reply with — in this order: + +1. The check table (from `report.md`): check name, ok, latency, detail. +2. The score line: `/ checks passed — score /100` + (mention `score255` from results.json as the off-chain value). +3. The GitHub permalink (when posted) and the `reportSha256` from + results.json (sha256 of the exact committed `report.md` bytes). +4. The full `results.json` content. +5. The EXACT command the operator runs to derive the ERC-8004 + validationResponse calldata (fill in the real values; the agent itself + NEVER runs this and never submits the transaction): + +```bash +obol smoke calldata \ + --target "" \ + --run-id "" \ + --response \ + --response-uri "" \ + --response-hash 0x \ + --network base-sepolia +``` + +Notes for that command: + +- It derives `requestHash = keccak256("obol/smoke-test/v1||")` — keccak256 is computed by the CLI, not in-pod + (there is no reliable in-pod keccak; `hashlib.sha3_256` is NIST SHA-3, NOT + keccak256). That is why `requestHash` is deliberately absent from + results.json. +- `--response` is **score100** (0–100), not score255. +- `--response-hash` is `0x` + the 64-hex `reportSha256` (sha256 of the + committed report.md bytes). Omit `--response-uri`/`--response-hash` when the + GitHub post didn't run (a zero response hash is allowed). +- The CLI prints the ValidationRegistry address + calldata; the operator + submits with THEIR wallet. + +## Seller/operator setup (one-time, host side — not the agent) + +GitHub credentials ride the existing `hermes-env` Secret (already whitelisted +by the admission policy and RBAC — do NOT invent a new Secret name): + +```bash +obol kubectl -n agent- create secret generic hermes-env \ + --from-literal=GITHUB_TOKEN= \ + --from-literal=GITHUB_REPORT_REPO=/ \ + --dry-run=client -o yaml | obol kubectl apply -f - +obol kubectl -n agent- rollout restart deploy/hermes +``` + +**Token scope is the blast radius.** The buyer drives a prompt-injectable +agent that holds this token in env, so it MUST be a fine-grained PAT scoped to +ONLY the one public report repo, with `contents: read+write` and nothing +else. Accepted v0 worst case: an attacker writes junk to that one public +repo. Never use a classic PAT or broader scopes. The token lives only in +Secret data — never in the Agent CR spec/annotations/status. + +Sell the agent: + +```bash +obol agent new --skills smoke-test --objective "Paid read-only smoke tests of Obol Stack public surfaces" +obol sell agent --per-request --chain --pay-to 0x +``` + +Buyers reach it via `buy.py pay-agent --model --message "smoke-test "` +(streaming). v0: no buyer token handoff — reports always land in the +seller-owned repo. + +## Artifacts + +``` +./smoke/// +├── report.md # canonical committed bytes; sha256 = reportSha256 +└── results.json # version obol/smoke-test/v1; stays local + in chat reply +``` + +results.json fields: `version`, `target` (normalized), `runId`, `timestamp`, +`checks[]` (`name`, `ok`, `detail`, `ms`, optional `informational`), +`passed`, `total`, `score255`, `score100`, `reportSha256` (64 hex, no 0x), +`permalink` (empty until post succeeds). diff --git a/internal/embed/skills/smoke-test/scripts/gh_post.py b/internal/embed/skills/smoke-test/scripts/gh_post.py new file mode 100644 index 00000000..54438e93 --- /dev/null +++ b/internal/embed/skills/smoke-test/scripts/gh_post.py @@ -0,0 +1,338 @@ +#!/usr/bin/env python3 +"""gh_post.py — commit a smoke report.md to the seller-owned public GitHub repo. + +Posting contract: + - Base https://api.github.com (override with GITHUB_API_BASE for tests only). + - Repo from env GITHUB_REPORT_REPO, validated against + ^[A-Za-z0-9_.-]+/[A-Za-z0-9_.-]+$. + - Token from env GITHUB_TOKEN ONLY. By construction this script never + prints headers or env values, never puts the token in argv, and redacts + the token from every error string before it can reach stderr. + - Headers on every call: Authorization Bearer, Accept + application/vnd.github+json, X-GitHub-Api-Version 2022-11-28, and the + obol-smoke-test User-Agent. + - Redirects are NEVER followed (the default opener would replay the + Authorization header to the redirect target, even cross-host); a 3xx + from the API is surfaced as the final status and is a hard failure. + - Path: reports//.md (target-host = lowercase hostname + with ":" rewritten to "-"). + - Create-or-update: GET contents for the existing blob sha (a read, not a + write; other-than-200/404 retried once then abort), then PUT. On 409 + re-GET the sha once and retry the PUT once. On 403/429 honor Retry-After + (fallback: x-ratelimit-reset delta), sleep min(value, 30s), max 2 + retries. On 5xx/connection errors exponential 2s/4s, max 2 retries. + Total post budget 25s; on exhaustion exit non-zero with a re-run hint. + - Permalink = https://github.com/{o}/{r}/blob/{PUT .commit.sha}/{path} + (commit-pinned, NOT the branch-floating .content.html_url). + - Write #2 (best-effort, failure never fails the run): + reports//latest.md with only runId, score line, permalink. + Exactly <= 2 writes per run; results.json is NEVER committed. + +Usage: + GITHUB_TOKEN=... GITHUB_REPORT_REPO=owner/repo \ + python3 gh_post.py + +stdout payload is exactly two lines (everything else goes to stderr): + permalink: + content-sha: + +Exit codes: 0 on success (even if the best-effort latest.md write failed), +non-zero when the report could not be committed (report stays local; `post` +is re-runnable). + +Stdlib only: argparse/base64/json/os/re/sys/time/urllib. +""" + +import argparse +import base64 +import json +import os +import re +import sys +import time +import urllib.error +import urllib.parse +import urllib.request + +# Shared normalization with the probe — same skill scripts/ dir. +sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) +from smoke import USER_AGENT, host_slug # noqa: E402 + +API_BASE = os.environ.get("GITHUB_API_BASE", "https://api.github.com").rstrip("/") +REPO_RE = re.compile(r"^[A-Za-z0-9_.-]+/[A-Za-z0-9_.-]+$") +POST_BUDGET_SECONDS = 25 +MAX_SLEEP_SECONDS = 30 + + +class PostError(Exception): + """Operational posting failure. Messages are pre-redacted.""" + + +class _NoRedirect(urllib.request.HTTPRedirectHandler): + """Never follow redirects: the default handler re-sends every request + header — including Authorization — to the redirect target, even + cross-host, which would leak the token (mirrors smoke.py's _NoRedirect; + here the request carries the only credential). A 3xx comes back as the + final status and the retry ladder treats it as a hard failure.""" + + def redirect_request(self, req, fp, code, msg, headers, newurl): + return None + + +_OPENER = urllib.request.build_opener(_NoRedirect) + + +def _log(msg): + print(msg, file=sys.stderr) + + +def _redact(text, token): + text = str(text) + return text.replace(token, "[REDACTED]") if token else text + + +def _remaining(deadline): + return deadline - time.monotonic() + + +def _check_deadline(deadline): + if _remaining(deadline) <= 0: + raise PostError( + "post budget (%ds) exhausted; report remains local — re-run `post `" + % POST_BUDGET_SECONDS + ) + + +def _sleep_within(seconds, deadline): + seconds = min(seconds, MAX_SLEEP_SECONDS) + if seconds >= _remaining(deadline): + raise PostError( + "post budget (%ds) exhausted while backing off; report remains local — " + "re-run `post `" % POST_BUDGET_SECONDS + ) + time.sleep(seconds) + + +def _retry_after_seconds(headers): + """Retry-After seconds, falling back to the x-ratelimit-reset delta.""" + raw = headers.get("Retry-After") or headers.get("retry-after") + if raw: + try: + return max(1, int(float(raw))) + except ValueError: + pass + reset = headers.get("x-ratelimit-reset") or headers.get("X-RateLimit-Reset") + if reset: + try: + return max(1, int(float(reset)) - int(time.time())) + except ValueError: + pass + return 2 + + +def _gh_request(method, url, token, payload=None, deadline=None): + """One GitHub API call. Returns (status, headers_dict, body_bytes). + status == 0 means no HTTP response (connection-level failure); the body + then carries a redacted reason. Never raises, never logs headers.""" + headers = { + "Authorization": "Bearer " + token, + "Accept": "application/vnd.github+json", + "X-GitHub-Api-Version": "2022-11-28", + "User-Agent": USER_AGENT, + } + data = None + if payload is not None: + data = json.dumps(payload).encode("utf-8") + headers["Content-Type"] = "application/json" + timeout = 10.0 + if deadline is not None: + timeout = max(1.0, min(10.0, _remaining(deadline))) + req = urllib.request.Request(url, data=data, method=method, headers=headers) + try: + with _OPENER.open(req, timeout=timeout) as resp: + return resp.getcode(), dict(resp.headers), resp.read() + except urllib.error.HTTPError as exc: + try: + body = exc.read() + except Exception: + body = b"" + return exc.code, dict(exc.headers or {}), body + except Exception as exc: # URLError, timeout, ConnectionError, ... + return 0, {}, _redact(exc, token).encode("utf-8") + + +def _body_snippet(body, token): + return _redact(body.decode("utf-8", "replace")[:200], token) + + +def _contents_url(owner_repo, path): + return "%s/repos/%s/contents/%s" % (API_BASE, owner_repo, urllib.parse.quote(path, safe="/")) + + +def _get_existing_sha(owner_repo, path, token, deadline): + """Existing blob sha for create-or-update. 200 -> sha, 404 -> None, + anything else retried once then abort.""" + url = _contents_url(owner_repo, path) + for attempt in (1, 2): + _check_deadline(deadline) + status, _, body = _gh_request("GET", url, token, deadline=deadline) + if status == 200: + try: + return json.loads(body).get("sha") or None + except ValueError: + return None + if status == 404: + return None + if attempt == 1: + continue + raise PostError( + "GET contents %s failed (status %s): %s" % (path, status, _body_snippet(body, token)) + ) + + +def _put_file(owner_repo, path, message, content_bytes, sha, token, deadline): + """PUT one file via the contents API with the contract's retry ladder. + Returns the parsed PUT response JSON.""" + url = _contents_url(owner_repo, path) + body = {"message": message, "content": base64.b64encode(content_bytes).decode("ascii")} + if sha: + body["sha"] = sha + rate_retries = 0 + server_retries = 0 + conflict_retried = False + while True: + _check_deadline(deadline) + status, headers, raw = _gh_request("PUT", url, token, payload=body, deadline=deadline) + if status in (200, 201): + try: + return json.loads(raw) + except ValueError: + raise PostError("PUT %s returned %d but unparseable JSON" % (path, status)) + if status == 409 and not conflict_retried: + conflict_retried = True + new_sha = _get_existing_sha(owner_repo, path, token, deadline) + if new_sha: + body["sha"] = new_sha + else: + body.pop("sha", None) + continue + if status in (403, 429) and rate_retries < 2: + rate_retries += 1 + _sleep_within(_retry_after_seconds(headers), deadline) + continue + if (status >= 500 or status == 0) and server_retries < 2: + server_retries += 1 + _sleep_within(2 ** server_retries, deadline) # 2s, then 4s + continue + raise PostError( + "PUT %s failed (status %s): %s" % (path, status, _body_snippet(raw, token)) + ) + + +def post_run(run_dir): + """Commit /report.md per the contract, update results.json with + the commit-pinned permalink, best-effort update latest.md. + Returns (results_dict, permalink, content_sha). Raises PostError on + operational failure (report stays local; re-runnable).""" + token = os.environ.get("GITHUB_TOKEN", "").strip() + owner_repo = os.environ.get("GITHUB_REPORT_REPO", "").strip() + if not token: + raise PostError("GITHUB_TOKEN is not set (provision it via the hermes-env Secret)") + if not REPO_RE.match(owner_repo): + raise PostError( + "GITHUB_REPORT_REPO=%r is not / " + "(^[A-Za-z0-9_.-]+/[A-Za-z0-9_.-]+$)" % owner_repo + ) + + results_path = os.path.join(run_dir, "results.json") + report_path = os.path.join(run_dir, "report.md") + try: + with open(results_path, "r", encoding="utf-8") as fh: + results = json.load(fh) + with open(report_path, "rb") as fh: + report_bytes = fh.read() + except (OSError, ValueError) as exc: + raise PostError("cannot load run dir %s: %s" % (run_dir, _redact(exc, token))) + + run_id = str(results.get("runId", "")).strip() + target = str(results.get("target", "")).strip() + if not run_id or not target: + raise PostError("results.json missing runId/target — re-run the probe") + passed = int(results.get("passed", 0)) + total = int(results.get("total", 0)) + score100 = int(results.get("score100", 0)) + + target_host = host_slug(target) + report_repo_path = "reports/%s/%s.md" % (target_host, run_id) + deadline = time.monotonic() + POST_BUDGET_SECONDS + + # Write #1 — the report itself (create-or-update). + _log("posting %s to %s:%s" % (report_path, owner_repo, report_repo_path)) + sha = _get_existing_sha(owner_repo, report_repo_path, token, deadline) + put = _put_file( + owner_repo, + report_repo_path, + "smoke: %s %s %d/%d" % (target_host, run_id, passed, total), + report_bytes, + sha, + token, + deadline, + ) + try: + commit_sha = put["commit"]["sha"] + content_sha = put["content"]["sha"] + except (KeyError, TypeError): + raise PostError("PUT response missing commit/content sha") + + permalink = "https://github.com/%s/blob/%s/%s" % (owner_repo, commit_sha, report_repo_path) + results["permalink"] = permalink + try: + with open(results_path, "w", encoding="utf-8") as fh: + fh.write(json.dumps(results, indent=2) + "\n") + except OSError as exc: + _log("warning: could not update results.json: %s" % _redact(exc, token)) + + # Write #2 — best-effort latest.md pointer; failure does NOT fail the run. + latest_repo_path = "reports/%s/latest.md" % target_host + latest_bytes = ( + "Run ID: %s\nResult: %d/%d checks passed — score %d/100\nReport: %s\n" + % (run_id, passed, total, score100, permalink) + ).encode("utf-8") + try: + latest_sha = _get_existing_sha(owner_repo, latest_repo_path, token, deadline) + _put_file( + owner_repo, + latest_repo_path, + "smoke: %s latest %s" % (target_host, run_id), + latest_bytes, + latest_sha, + token, + deadline, + ) + except PostError as exc: + _log("warning: latest.md update skipped: %s" % exc) + + return results, permalink, content_sha + + +def main(argv=None): + parser = argparse.ArgumentParser( + prog="gh_post.py", + description="Commit a smoke run's report.md to the seller-owned public report repo.", + ) + parser.add_argument("run_dir", help="run dir written by smoke.py probe, e.g. ./smoke//") + args = parser.parse_args(argv) + try: + _, permalink, content_sha = post_run(args.run_dir) + except PostError as exc: + _log("error: %s" % exc) + _log("report remains local; re-run: python3 gh_post.py %s" % args.run_dir) + return 1 + # The ONLY stdout payload lines: + print("permalink: %s" % permalink) + print("content-sha: %s" % content_sha) + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/internal/embed/skills/smoke-test/scripts/smoke.py b/internal/embed/skills/smoke-test/scripts/smoke.py new file mode 100644 index 00000000..c476db68 --- /dev/null +++ b/internal/embed/skills/smoke-test/scripts/smoke.py @@ -0,0 +1,523 @@ +#!/usr/bin/env python3 +"""smoke.py — read-only smoke probe of an Obol Stack public surface. + +Probes a TARGET base URL's public discovery + payment-gating surface with +plain GETs, writes a markdown report + machine-readable results, and (via +`post` / gh_post.py) commits the report to a seller-owned public GitHub repo. + +Safety contract (non-negotiable): + - GET only. NEVER sends an X-PAYMENT header, never signs anything, never + settles anything, never submits chain transactions. + - Never probes a cross-host URL: catalog endpoints are reduced to their + PATH and re-joined onto the target base URL. + - Response bodies capped at 1 MiB; per-check timeout 8s; one retry on + connection-level errors only (refused/reset — fast failures), never on + timeouts or HTTP errors. + - Redirects are not followed (a 3xx counts as the final status). + +Checks (counted unless marked informational): + 1. skill-md GET /skill.md -> 200 + non-empty body + 2. services-json GET /api/services.json -> 200 + bare JSON LIST of + objects with non-empty string `name` and `endpoint` + 3. x402-402: per advertised service (first 5 sorted by name): + GET -> 402 + valid x402 body + (x402Version present; accepts non-empty; each entry has + scheme/network non-empty, payTo/asset 0x40-hex, and a + positive digits-only maxAmountRequired OR amount) + 4. agent-registration GET /.well-known/agent-registration.json + -> 200 + JSON object (INFORMATIONAL — excluded from score) + +Scoring: passed/total over counted checks only (total >= 2). + score255 = floor(255*passed/total) (off-chain, task-spec field) + score100 = floor(100*passed/total) (THE on-chain value — registry caps at 100) + +Usage: + python3 smoke.py probe [--run-id ] [--out-dir ] + python3 smoke.py post + python3 smoke.py run [--run-id ] [--out-dir ] + +`probe` performs NO network writes; it writes report.md + results.json under +/// (default ./smoke/...) and prints results.json +to stdout. `post` commits an existing report to GitHub (env GITHUB_TOKEN + +GITHUB_REPORT_REPO required) and prints the updated results.json. `run` is +probe+post one-shot for host/manual use; it degrades to probe-only when the +GitHub env is absent. + +Exit codes: 0 even when checks fail (the score carries the verdict); +non-zero only on operational errors (bad args, unwritable workspace, +GitHub post failure). + +Stdlib only: argparse/hashlib/json/re/secrets/socket/time/urllib. +""" + +import argparse +import hashlib +import json +import os +import re +import secrets +import socket +import sys +import time +import urllib.error +import urllib.parse +import urllib.request + +# --------------------------------------------------------------------------- +# Constants +# --------------------------------------------------------------------------- + +VERSION = "obol/smoke-test/v1" + +# Same Cloudflare-WAF-safe UA convention as buy-x402's buy.py. +USER_AGENT = "obol-smoke-test/1.0 (+https://github.com/ObolNetwork/obol-stack)" + +PER_CHECK_TIMEOUT = 8 # seconds, per attempt +MAX_BODY_BYTES = 1024 * 1024 # 1 MiB body cap on every response +MAX_SERVICES = 5 # probe at most the first 5 services sorted by name + +ADDR_RE = re.compile(r"^0x[0-9a-fA-F]{40}$") +DIGITS_RE = re.compile(r"^[0-9]+$") +RUN_ID_RE = re.compile(r"^[A-Za-z0-9._-]+$") +MAX_DETAIL_LEN = 200 + + +def log(msg): + """Diagnostics go to stderr; stdout is reserved for results.json.""" + print(msg, file=sys.stderr) + + +# --------------------------------------------------------------------------- +# Normalization (MUST stay in lockstep with Go: erc8004 normalizeSmokeTarget) +# --------------------------------------------------------------------------- + +def normalize_target(url): + """Identical to the Go-side normalization: strip whitespace, then strip + trailing slashes. The normalized form is what `obol smoke calldata` + hashes into the ERC-8004 requestHash preimage + ("obol/smoke-test/v1||") — this script never computes + keccak256 (no reliable in-pod keccak; hashlib.sha3_256 is NIST SHA-3, + NOT keccak256).""" + return url.strip().rstrip("/") + + +def host_slug(target): + """Lowercase hostname with ":" rewritten to "-", e.g. + obol.stack:8080 -> obol.stack-8080. Used for the local run dir AND the + GitHub report path (gh_post.py imports this — keep behavior stable).""" + netloc = urllib.parse.urlsplit(target).netloc + host = netloc.rsplit("@", 1)[-1].lower() + return re.sub(r"[^a-z0-9._-]", "-", host) + + +def default_run_id(): + """-<6 lowercase hex>.""" + return time.strftime("%Y%m%dT%H%M%SZ", time.gmtime()) + "-" + secrets.token_hex(3) + + +# --------------------------------------------------------------------------- +# HTTP (GET only — by construction this module cannot send a payment) +# --------------------------------------------------------------------------- + +class _NoRedirect(urllib.request.HTTPRedirectHandler): + """Never follow redirects: a redirect could send the probe cross-host. + A 3xx is returned as the final status of the check.""" + + def redirect_request(self, req, fp, code, msg, headers, newurl): + return None + + +_OPENER = urllib.request.build_opener(_NoRedirect) + + +def _fetch(url): + """GET url. Returns (status, body_bytes, error_str). Never raises. + + status == 0 with a non-empty error_str means no HTTP response at all. + One retry on connection-level errors only (ConnectionError — refused / + reset fail fast); timeouts and DNS failures are NOT retried so the + worst-case probe budget stays bounded under the agent's 80s terminal + timeout.""" + req = urllib.request.Request( + url, + method="GET", + headers={"User-Agent": USER_AGENT, "Accept": "*/*"}, + ) + attempt = 0 + while True: + attempt += 1 + try: + with _OPENER.open(req, timeout=PER_CHECK_TIMEOUT) as resp: + return resp.getcode(), resp.read(MAX_BODY_BYTES), "" + except urllib.error.HTTPError as exc: + try: + body = exc.read(MAX_BODY_BYTES) + except Exception: + body = b"" + return exc.code, body, "" + except urllib.error.URLError as exc: + reason = getattr(exc, "reason", exc) + if isinstance(reason, ConnectionError) and attempt == 1: + time.sleep(1) + continue + if isinstance(reason, (socket.timeout, TimeoutError)): + return 0, b"", "timeout after %ds" % PER_CHECK_TIMEOUT + return 0, b"", "connection failed: %s" % reason + except (socket.timeout, TimeoutError): + return 0, b"", "timeout after %ds" % PER_CHECK_TIMEOUT + except ConnectionError as exc: + if attempt == 1: + time.sleep(1) + continue + return 0, b"", "connection failed: %s" % exc + except OSError as exc: + return 0, b"", "network error: %s" % exc + + +def _clip(detail): + detail = str(detail) + if len(detail) > MAX_DETAIL_LEN: + detail = detail[: MAX_DETAIL_LEN - 1] + "…" + return detail + + +def _check(name, ok, detail, ms, informational=False): + entry = {"name": name, "ok": bool(ok), "detail": _clip(detail), "ms": int(ms)} + if informational: + entry["informational"] = True + return entry + + +def _timed(name, fn, informational=False): + """ms = wall-clock per check (includes the single connection-error retry).""" + t0 = time.monotonic() + ok, detail, extra = fn() + ms = round((time.monotonic() - t0) * 1000) + return _check(name, ok, detail, ms, informational=informational), extra + + +# --------------------------------------------------------------------------- +# Checks +# --------------------------------------------------------------------------- + +def check_skill_md(target): + def run(): + status, body, err = _fetch(target + "/skill.md") + if err: + return False, err, None + if status != 200: + return False, "expected 200, got %d" % status, None + if not body.decode("utf-8", "replace").strip(): + return False, "200 but body empty after strip", None + return True, "200, %d bytes" % len(body), None + + return _timed("skill-md", run)[0] + + +def check_services_json(target): + """Returns (check, services). services is the validated advertised list + (possibly empty) when the check passed, else [].""" + + def run(): + status, body, err = _fetch(target + "/api/services.json") + if err: + return False, err, [] + if status != 200: + return False, "expected 200, got %d" % status, [] + try: + parsed = json.loads(body.decode("utf-8", "replace")) + except ValueError as exc: + return False, "invalid JSON: %s" % exc, [] + # The catalog is a BARE JSON array of entries — not {"services": [...]}. + if not isinstance(parsed, list): + return False, "top-level JSON is not a list", [] + for i, entry in enumerate(parsed): + if not isinstance(entry, dict): + return False, "entry %d is not an object" % i, [] + name = entry.get("name") + endpoint = entry.get("endpoint") + if not isinstance(name, str) or not name.strip(): + return False, "entry %d missing non-empty string `name`" % i, [] + if not isinstance(endpoint, str) or not endpoint.strip(): + return False, "entry %d (%s) missing non-empty string `endpoint`" % (i, name), [] + return True, "200, %d service(s) advertised" % len(parsed), parsed + + return _timed("services-json", run) + + +def _validate_accepts_entry(entry, idx): + """Returns failure reason or '' for one entry of the 402 `accepts` list. + Amount uses the same v1/v2 dual lookup as buy.py: maxAmountRequired + falling back to amount.""" + if not isinstance(entry, dict): + return "accepts[%d] is not an object" % idx + for field in ("scheme", "network"): + value = entry.get(field) + if not isinstance(value, str) or not value.strip(): + return "accepts[%d].%s missing or empty" % (idx, field) + for field in ("payTo", "asset"): + value = entry.get(field) + if not isinstance(value, str) or not ADDR_RE.match(value): + return "accepts[%d].%s is not a 0x..40-hex address" % (idx, field) + raw = entry.get("maxAmountRequired") + if raw is None or not str(raw).strip(): + raw = entry.get("amount") + amount = str(raw if raw is not None else "").strip() + if not DIGITS_RE.match(amount) or int(amount) <= 0: + return "accepts[%d] has no positive digits-only maxAmountRequired/amount" % idx + return "" + + +def check_service_402(target, service): + """One counted check per advertised service. Probes ONLY the path of the + catalog endpoint joined onto the target base URL — never a cross-host URL + the catalog hands us.""" + name = service["name"].strip() + + def run(): + path = urllib.parse.urlsplit(service["endpoint"].strip()).path + if not path.startswith("/"): + path = "/" + path + status, body, err = _fetch(target + path) + if err: + return False, err, None + if status != 402: + return False, "expected 402, got %d" % status, None + try: + parsed = json.loads(body.decode("utf-8", "replace")) + except ValueError as exc: + return False, "402 body is not JSON: %s" % exc, None + if not isinstance(parsed, dict): + return False, "402 body is not a JSON object", None + if "x402Version" not in parsed: + return False, "402 body missing x402Version", None + accepts = parsed.get("accepts") + if not isinstance(accepts, list) or not accepts: + return False, "402 body has no non-empty accepts list", None + for i, entry in enumerate(accepts): + reason = _validate_accepts_entry(entry, i) + if reason: + return False, reason, None + return True, "402, %d payment option(s)" % len(accepts), None + + return _timed("x402-402:" + name, run)[0] + + +def check_agent_registration(target): + """INFORMATIONAL — recorded but excluded from passed/total/score.""" + + def run(): + status, body, err = _fetch(target + "/.well-known/agent-registration.json") + if err: + return False, err, None + if status != 200: + return False, "expected 200, got %d" % status, None + try: + parsed = json.loads(body.decode("utf-8", "replace")) + except ValueError as exc: + return False, "invalid JSON: %s" % exc, None + if not isinstance(parsed, dict): + return False, "200 but body is not a JSON object", None + return True, "200, JSON object", None + + return _timed("agent-registration", run, informational=True)[0] + + +# --------------------------------------------------------------------------- +# Report rendering +# --------------------------------------------------------------------------- + +def _md_cell(text): + return str(text).replace("|", "\\|").replace("\n", " ").replace("\r", " ") + + +def build_report(results, probed_count, advertised_count): + lines = [ + "# Obol Stack Smoke Report", + "", + "- Target: %s" % results["target"], + "- Run ID: %s" % results["runId"], + "- Timestamp: %s" % results["timestamp"], + "- Result: %d/%d checks passed — score %d/100" + % (results["passed"], results["total"], results["score100"]), + "", + "| Check | OK | Latency (ms) | Detail |", + "|---|---|---|---|", + ] + for check in results["checks"]: + name = check["name"] + if check.get("informational"): + name += " (info)" + lines.append( + "| %s | %s | %d | %s |" + % (_md_cell(name), "yes" if check["ok"] else "no", check["ms"], _md_cell(check["detail"])) + ) + if advertised_count > probed_count: + lines.append("") + lines.append("Probed %d of %d advertised services" % (probed_count, advertised_count)) + return "\n".join(lines) + "\n" + + +# --------------------------------------------------------------------------- +# Probe driver +# --------------------------------------------------------------------------- + +def run_probe(target_raw, run_id, out_base): + target = normalize_target(target_raw) + if not target.startswith(("http://", "https://")): + raise SystemExit( + "error: target must be an absolute http(s) URL (got %r) — the " + "normalized target is hashed into the on-chain requestHash, so " + "always pass the scheme explicitly" % target_raw + ) + if run_id is None or not str(run_id).strip(): + run_id = default_run_id() + run_id = str(run_id).strip() + if not RUN_ID_RE.match(run_id) or set(run_id) == {"."}: + # A buyer can suggest the run id; "." / ".." would escape the + # per-run directory under the report root. + raise SystemExit("error: --run-id must match ^[A-Za-z0-9._-]+$ and not be dots-only (got %r)" % run_id) + + log("smoke probe: target=%s run-id=%s" % (target, run_id)) + + checks = [] + checks.append(check_skill_md(target)) + log(" [%s] skill-md: %s" % ("ok" if checks[-1]["ok"] else "FAIL", checks[-1]["detail"])) + + services_check, services = check_services_json(target) + checks.append(services_check) + log(" [%s] services-json: %s" % ("ok" if services_check["ok"] else "FAIL", services_check["detail"])) + + advertised = len(services) if services_check["ok"] else 0 + probed = 0 + if services_check["ok"] and services: + for service in sorted(services, key=lambda s: s["name"])[:MAX_SERVICES]: + check = check_service_402(target, service) + checks.append(check) + probed += 1 + log(" [%s] %s: %s" % ("ok" if check["ok"] else "FAIL", check["name"], check["detail"])) + + info = check_agent_registration(target) + checks.append(info) + log(" [%s] agent-registration (info): %s" % ("ok" if info["ok"] else "FAIL", info["detail"])) + + counted = [c for c in checks if not c.get("informational")] + passed = sum(1 for c in counted if c["ok"]) + total = len(counted) # always >= 2 (skill-md + services-json) + + results = { + "version": VERSION, + "target": target, + "runId": run_id, + "timestamp": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()), + "checks": checks, + "passed": passed, + "total": total, + "score255": (255 * passed) // total, + "score100": (100 * passed) // total, + "reportSha256": "", + "permalink": "", + } + + run_dir = os.path.join(out_base, host_slug(target), run_id) + os.makedirs(run_dir, exist_ok=True) + + report = build_report(results, probed, advertised) + report_bytes = report.encode("utf-8") + report_path = os.path.join(run_dir, "report.md") + with open(report_path, "wb") as fh: + fh.write(report_bytes) + # reportSha256 = sha256 over the EXACT bytes written to disk (the same + # bytes gh_post.py base64s into the GitHub PUT). Computed after the final + # report write, before results.json. + results["reportSha256"] = hashlib.sha256(report_bytes).hexdigest() + + with open(os.path.join(run_dir, "results.json"), "w", encoding="utf-8") as fh: + fh.write(json.dumps(results, indent=2) + "\n") + + log("run dir: %s" % os.path.abspath(run_dir)) + return results, run_dir + + +# --------------------------------------------------------------------------- +# Commands +# --------------------------------------------------------------------------- + +def _load_gh_post(): + sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) + import gh_post # noqa: E402 (sibling script in this skill) + + return gh_post + + +def cmd_probe(args): + results, _ = run_probe(args.target, args.run_id, args.out_dir) + print(json.dumps(results, indent=2)) + return 0 + + +def cmd_post(args): + gh_post = _load_gh_post() + try: + results, _, _ = gh_post.post_run(args.run_dir) + except gh_post.PostError as exc: + log("error: %s" % exc) + log("report remains local; re-run: python3 smoke.py post %s" % args.run_dir) + return 1 + print(json.dumps(results, indent=2)) + return 0 + + +def cmd_run(args): + results, run_dir = run_probe(args.target, args.run_id, args.out_dir) + if os.environ.get("GITHUB_TOKEN", "").strip() and os.environ.get("GITHUB_REPORT_REPO", "").strip(): + gh_post = _load_gh_post() + try: + results, _, _ = gh_post.post_run(run_dir) + except gh_post.PostError as exc: + print(json.dumps(results, indent=2)) + log("error: %s" % exc) + log("report remains local; re-run: python3 smoke.py post %s" % run_dir) + return 1 + else: + log("GITHUB_TOKEN/GITHUB_REPORT_REPO not set; report kept local (no GitHub post)") + print(json.dumps(results, indent=2)) + return 0 + + +def main(argv=None): + parser = argparse.ArgumentParser( + prog="smoke.py", + description="Read-only smoke probe of an Obol Stack public surface (never pays, never signs).", + ) + sub = parser.add_subparsers(dest="command", required=True) + + p_probe = sub.add_parser("probe", help="probe checks only; writes report.md + results.json, no network writes") + p_probe.add_argument("target", nargs="?", help="target base URL, e.g. https:// or http://obol.stack:8080") + p_probe.add_argument("--target", dest="target_flag", help="alternative to the positional target") + p_probe.add_argument("--run-id", help="run identifier (^[A-Za-z0-9._-]+$); default -<6hex>") + p_probe.add_argument("--out-dir", default="./smoke", help="base output dir (default ./smoke)") + p_probe.set_defaults(func=cmd_probe) + + p_post = sub.add_parser("post", help="commit an existing run dir's report.md to GitHub (env GITHUB_TOKEN + GITHUB_REPORT_REPO)") + p_post.add_argument("run_dir", help="run dir written by probe, e.g. ./smoke//") + p_post.set_defaults(func=cmd_post) + + p_run = sub.add_parser("run", help="probe + post one-shot (host/manual use; agents should run probe then post)") + p_run.add_argument("target", nargs="?", help="target base URL") + p_run.add_argument("--target", dest="target_flag", help="alternative to the positional target") + p_run.add_argument("--run-id", help="run identifier") + p_run.add_argument("--out-dir", default="./smoke", help="base output dir (default ./smoke)") + p_run.set_defaults(func=cmd_run) + + args = parser.parse_args(argv) + if hasattr(args, "target"): + target = args.target or getattr(args, "target_flag", None) + if not target: + parser.error("a target base URL is required (positional or --target)") + args.target = target + return args.func(args) + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/internal/enclave/enclave_darwin.go b/internal/enclave/enclave_darwin.go index 215fcd4a..db0c8479 100644 --- a/internal/enclave/enclave_darwin.go +++ b/internal/enclave/enclave_darwin.go @@ -344,8 +344,8 @@ func (k *seKey) Sign(digest []byte) ([]byte, error) { ) if n == 0 { msg := cfStringToGo(errStr) - if unsafe.Pointer(errStr) != nil { - C.CFRelease(C.CFTypeRef(unsafe.Pointer(errStr))) + if errStr != 0 { + C.CFRelease(C.CFTypeRef(errStr)) } return nil, fmt.Errorf("enclave: Sign failed: %s", msg) } @@ -370,8 +370,8 @@ func (k *seKey) ECDH(peerPubKeyBytes []byte) ([]byte, error) { ) if n == 0 { msg := cfStringToGo(errStr) - if unsafe.Pointer(errStr) != nil { - C.CFRelease(C.CFTypeRef(unsafe.Pointer(errStr))) + if errStr != 0 { + C.CFRelease(C.CFTypeRef(errStr)) } return nil, fmt.Errorf("enclave: ECDH failed: %s", msg) } @@ -413,14 +413,14 @@ func newKey(tag string) (Key, error) { var errStr C.CFStringRef privRef := C.create_se_key(ctag, C.int(1), &errCode, &errStr) //nolint:gocritic // CGo pointer arguments, not duplicate subexpressions - if unsafe.Pointer(privRef) != nil { + if privRef != 0 { // Success — key is in keychain. - if unsafe.Pointer(errStr) != nil { - C.CFRelease(C.CFTypeRef(unsafe.Pointer(errStr))) + if errStr != 0 { + C.CFRelease(C.CFTypeRef(errStr)) } pub, err := extractPublicKey(privRef) if err != nil { - C.CFRelease(C.CFTypeRef(unsafe.Pointer(privRef))) + C.CFRelease(C.CFTypeRef(privRef)) return nil, err } return &seKey{privRef: privRef, tag: tag, pubKey: pub, persistent: true}, nil @@ -430,32 +430,32 @@ func newKey(tag string) (Key, error) { // fall back to an ephemeral key (dev/test use without code-signing). if C.int(errCode) != C.OBOL_ERR_SEC_MISSING_ENTITLEMENT { msg := cfStringToGo(errStr) - if unsafe.Pointer(errStr) != nil { - C.CFRelease(C.CFTypeRef(unsafe.Pointer(errStr))) + if errStr != 0 { + C.CFRelease(C.CFTypeRef(errStr)) } return nil, fmt.Errorf("enclave: create_se_key (persistent): %s", msg) } - if unsafe.Pointer(errStr) != nil { - C.CFRelease(C.CFTypeRef(unsafe.Pointer(errStr))) + if errStr != 0 { + C.CFRelease(C.CFTypeRef(errStr)) } // Ephemeral fallback. var errStr2 C.CFStringRef privRef = C.create_se_key(ctag, C.int(0), &errCode, &errStr2) //nolint:gocritic // CGo pointer arguments, not duplicate subexpressions - if unsafe.Pointer(privRef) == nil { + if privRef == 0 { msg := cfStringToGo(errStr2) - if unsafe.Pointer(errStr2) != nil { - C.CFRelease(C.CFTypeRef(unsafe.Pointer(errStr2))) + if errStr2 != 0 { + C.CFRelease(C.CFTypeRef(errStr2)) } return nil, fmt.Errorf("enclave: create_se_key (ephemeral fallback): %s", msg) } - if unsafe.Pointer(errStr2) != nil { - C.CFRelease(C.CFTypeRef(unsafe.Pointer(errStr2))) + if errStr2 != 0 { + C.CFRelease(C.CFTypeRef(errStr2)) } pub, err := extractPublicKey(privRef) if err != nil { - C.CFRelease(C.CFTypeRef(unsafe.Pointer(privRef))) + C.CFRelease(C.CFTypeRef(privRef)) return nil, err } k := &seKey{privRef: privRef, tag: tag, pubKey: pub, persistent: false} @@ -475,23 +475,23 @@ func loadKey(tag string) (Key, error) { privRef := C.load_se_key(ctag, &found, &errStr) //nolint:gocritic // CGo pointer arguments, not duplicate subexpressions if found == 0 { - if unsafe.Pointer(errStr) != nil { + if errStr != 0 { msg := cfStringToGo(errStr) - C.CFRelease(C.CFTypeRef(unsafe.Pointer(errStr))) + C.CFRelease(C.CFTypeRef(errStr)) return nil, fmt.Errorf("enclave: load_se_key: %s", msg) } return nil, ErrKeyNotFound } - if unsafe.Pointer(privRef) == nil { + if privRef == 0 { return nil, ErrKeyNotFound } - if unsafe.Pointer(errStr) != nil { - C.CFRelease(C.CFTypeRef(unsafe.Pointer(errStr))) + if errStr != 0 { + C.CFRelease(C.CFTypeRef(errStr)) } pub, err := extractPublicKey(privRef) if err != nil { - C.CFRelease(C.CFTypeRef(unsafe.Pointer(privRef))) + C.CFRelease(C.CFTypeRef(privRef)) return nil, err } @@ -590,7 +590,7 @@ func extractPublicKey(privRef C.SecKeyRef) ([]byte, error) { // cfStringToGo converts a CFStringRef to a Go string. func cfStringToGo(s C.CFStringRef) string { - if unsafe.Pointer(s) == nil { + if s == 0 { return "(no error description)" } cstr := C.cfstring_to_c(s) diff --git a/internal/enclave/enclave_stub.go b/internal/enclave/enclave_stub.go index b3c46460..fc38d7ca 100644 --- a/internal/enclave/enclave_stub.go +++ b/internal/enclave/enclave_stub.go @@ -5,16 +5,16 @@ package enclave // stubKey satisfies the Key interface on unsupported platforms. type stubKey struct{ tag string } -func (s *stubKey) PublicKeyBytes() []byte { return nil } -func (s *stubKey) Sign(_ []byte) ([]byte, error) { return nil, ErrNotSupported } -func (s *stubKey) ECDH(_ []byte) ([]byte, error) { return nil, ErrNotSupported } -func (s *stubKey) Decrypt(_ []byte) ([]byte, error) { return nil, ErrNotSupported } -func (s *stubKey) Tag() string { return s.tag } -func (s *stubKey) Persistent() bool { return false } -func (s *stubKey) Delete() error { return ErrNotSupported } +func (s *stubKey) PublicKeyBytes() []byte { return nil } +func (s *stubKey) Sign(_ []byte) ([]byte, error) { return nil, ErrNotSupported } +func (s *stubKey) ECDH(_ []byte) ([]byte, error) { return nil, ErrNotSupported } +func (s *stubKey) Decrypt(_ []byte) ([]byte, error) { return nil, ErrNotSupported } +func (s *stubKey) Tag() string { return s.tag } +func (s *stubKey) Persistent() bool { return false } +func (s *stubKey) Delete() error { return ErrNotSupported } -func newKey(_ string) (Key, error) { return nil, ErrNotSupported } -func loadKey(_ string) (Key, error) { return nil, ErrNotSupported } -func deleteKey(_ string) error { return ErrNotSupported } -func checkSIP() error { return ErrNotSupported } -func decrypt(_ string, _ []byte) ([]byte, error) { return nil, ErrNotSupported } +func newKey(_ string) (Key, error) { return nil, ErrNotSupported } +func loadKey(_ string) (Key, error) { return nil, ErrNotSupported } +func deleteKey(_ string) error { return ErrNotSupported } +func checkSIP() error { return ErrNotSupported } +func decrypt(_ string, _ []byte) ([]byte, error) { return nil, ErrNotSupported } diff --git a/internal/erc8004/abi.go b/internal/erc8004/abi.go index 7598181c..be1de094 100644 --- a/internal/erc8004/abi.go +++ b/internal/erc8004/abi.go @@ -16,7 +16,13 @@ const ( // ReputationRegistryBaseSepolia is the ERC-8004 Reputation Registry on Base Sepolia. ReputationRegistryBaseSepolia = "0x8004B663056A597Dffe9eCcC1965A193B7388713" - // ValidationRegistryBaseSepolia is the ERC-8004 Validation Registry on Base Sepolia. + // ValidationRegistryBaseSepolia is the v1.0.0 ERC-8004 Validation Registry + // address from the pre-v2 draft. + // + // Deprecated: this address has NO CODE on Base Sepolia (it was an Ethereum + // Sepolia deployment). Use ValidationRegistryAddress(network) / + // ValidationRegistryV2BaseSepolia (validation.go) — verified on-chain, + // getVersion()=="2.0.0". ValidationRegistryBaseSepolia = "0x8004CB39f29c09145F24Ad9dDe2A108C1A2cdfC5" // DefaultRPCBase is the default JSON-RPC base URL the controller uses to diff --git a/internal/erc8004/bounty.go b/internal/erc8004/bounty.go new file mode 100644 index 00000000..b84d4b5a --- /dev/null +++ b/internal/erc8004/bounty.go @@ -0,0 +1,26 @@ +// ServiceBounty ↔ ERC-8004 grounding: the eval-request hash binds an +// evaluator's on-chain validationResponse to one specific bounty + evaluator +// pair, so an annotation-level reveal can be checked against a chain-anchored +// entry (a "grounded" verdict). + +package erc8004 + +import ( + "strings" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/crypto" +) + +// bountyEvalDomain is the versioned domain prefix for bounty eval-request +// hashes. Changing it is a breaking change for every grounded verdict. +const bountyEvalDomain = "obol/bounty-eval/v1" + +// BountyEvalRequestHash derives the ERC-8004 validation request hash for one +// (bounty, evaluator) pair: keccak256 of the exact ASCII bytes +// "obol/bounty-eval/v1||". The CLI (evaluator +// side, submitting validationResponse) and the controller (grounding side, +// matching chain entries) MUST compute this identically. +func BountyEvalRequestHash(bountyUID, evaluator string) common.Hash { + return crypto.Keccak256Hash([]byte(bountyEvalDomain + "|" + bountyUID + "|" + strings.ToLower(evaluator))) +} diff --git a/internal/erc8004/bounty_test.go b/internal/erc8004/bounty_test.go new file mode 100644 index 00000000..1f4af417 --- /dev/null +++ b/internal/erc8004/bounty_test.go @@ -0,0 +1,35 @@ +package erc8004 + +import "testing" + +// TestBountyEvalRequestHash_Golden pins the exact preimage layout +// ("obol/bounty-eval/v1||"). The CLI signs +// validationResponses against this hash and the controller grounds verdicts +// by matching it on-chain — any drift silently breaks grounding, so the +// vector is hardcoded, not recomputed. +func TestBountyEvalRequestHash_Golden(t *testing.T) { + const ( + bountyUID = "8b9af0d4-9c3e-4a64-b1d0-2f50f2a1c111" + evaluator = "0xAbCdEf0123456789aBcDeF0123456789AbCdEf01" + golden = "0x22683f2360f35f41b5e5122865e048bb0dcb3b7896fc7280545fb09fbfdfa51a" + ) + + if got := BountyEvalRequestHash(bountyUID, evaluator).Hex(); got != golden { + t.Errorf("BountyEvalRequestHash = %s, want %s", got, golden) + } + + // The evaluator address is lowercased into the preimage: checksummed and + // lowercase forms of the same address must ground identically. + lower := BountyEvalRequestHash(bountyUID, "0xabcdef0123456789abcdef0123456789abcdef01") + if lower.Hex() != golden { + t.Errorf("lowercase evaluator hash = %s, want %s (address must be case-insensitive)", lower.Hex(), golden) + } + + // Different bounty or evaluator must never collide with the golden pair. + if BountyEvalRequestHash("other-uid", evaluator).Hex() == golden { + t.Error("different bountyUID produced the golden hash") + } + if BountyEvalRequestHash(bountyUID, "0x0000000000000000000000000000000000000001").Hex() == golden { + t.Error("different evaluator produced the golden hash") + } +} diff --git a/internal/erc8004/calldata.go b/internal/erc8004/calldata.go new file mode 100644 index 00000000..f2070bb0 --- /dev/null +++ b/internal/erc8004/calldata.go @@ -0,0 +1,99 @@ +// Identity Registry calldata builders (calldata-printer pattern). +// +// The transact path for setMetadata exists on Client +// (SetMetadataWithOpts), but agent operators frequently hold the +// registration key in a wallet the CLI never sees. These encoders build +// the raw to+data pair so the CLI can print it and the OPERATOR submits +// with their own wallet — the controller NEVER signs. + +package erc8004 + +import ( + "fmt" + "math/big" + "strings" + "sync" + + "github.com/ethereum/go-ethereum/accounts/abi" +) + +var ( + identityABIOnce sync.Once + identityABIParsed abi.ABI + identityABIErr error +) + +// identityABI lazily parses the embedded Identity Registry ABI once. +// Client.newClient keeps its own parse (it predates this helper and +// owns a bound contract); encoders share this copy. +func identityABI() (abi.ABI, error) { + identityABIOnce.Do(func() { + identityABIParsed, identityABIErr = abi.JSON(strings.NewReader(identityRegistryABI)) + }) + if identityABIErr != nil { + return abi.ABI{}, fmt.Errorf("erc8004: parse identity registry abi: %w", identityABIErr) + } + return identityABIParsed, nil +} + +// EncodeSetMetadata builds calldata for +// setMetadata(uint256 agentId, string metadataKey, bytes metadataValue) +// on the ERC-8004 Identity Registry. Must be submitted by the agent +// owner's wallet. Note the registry's reference implementation reverts +// when the new value equals the stored value (see SetMetadataWithOpts), +// so re-submitting an unchanged hash fails on-chain as a no-op guard. +func EncodeSetMetadata(agentID *big.Int, key string, value []byte) ([]byte, error) { + if err := checkAgentID(agentID); err != nil { + return nil, err + } + if strings.TrimSpace(key) == "" { + return nil, fmt.Errorf("erc8004: metadata key must not be empty") + } + + parsed, err := identityABI() + if err != nil { + return nil, err + } + data, err := parsed.Pack("setMetadata", agentID, key, value) + if err != nil { + return nil, fmt.Errorf("erc8004: pack setMetadata: %w", err) + } + return data, nil +} + +// SetMetadataCall is the decoded argument set of a setMetadata call. +type SetMetadataCall struct { + AgentID *big.Int + Key string + Value []byte +} + +// DecodeSetMetadataCalldata decodes setMetadata calldata (selector + +// ABI-encoded args). Useful for provenance checks on observed +// transactions and for tests. +func DecodeSetMetadataCalldata(data []byte) (SetMetadataCall, error) { + parsed, err := identityABI() + if err != nil { + return SetMetadataCall{}, err + } + values, err := unpackCalldata(parsed, "setMetadata", data) + if err != nil { + return SetMetadataCall{}, err + } + if len(values) != 3 { + return SetMetadataCall{}, fmt.Errorf("erc8004: setMetadata arg count = %d, want 3", len(values)) + } + + out := SetMetadataCall{} + var ok bool + if out.AgentID, ok = values[0].(*big.Int); !ok { + return SetMetadataCall{}, fmt.Errorf("erc8004: agentId type = %T", values[0]) + } + if out.Key, ok = values[1].(string); !ok { + return SetMetadataCall{}, fmt.Errorf("erc8004: metadataKey type = %T", values[1]) + } + if out.Value, ok = values[2].([]byte); !ok { + return SetMetadataCall{}, fmt.Errorf("erc8004: metadataValue type = %T", values[2]) + } + return out, nil +} diff --git a/internal/erc8004/networks_test.go b/internal/erc8004/networks_test.go index 034bd7d6..e8eb670a 100644 --- a/internal/erc8004/networks_test.go +++ b/internal/erc8004/networks_test.go @@ -41,7 +41,7 @@ func TestResolveNetworks(t *testing.T) { {"base-sepolia", 1, false}, {"mainnet,base", 2, false}, {"base-sepolia,base,ethereum", 3, false}, - {"base,base", 1, false}, // deduplicate + {"base,base", 1, false}, // deduplicate {"mainnet,ethereum", 1, false}, // same network, different aliases {"", 0, true}, {"unknown", 0, true}, diff --git a/internal/erc8004/reputation.go b/internal/erc8004/reputation.go new file mode 100644 index 00000000..a9765289 --- /dev/null +++ b/internal/erc8004/reputation.go @@ -0,0 +1,437 @@ +package erc8004 + +// ERC-8004 Reputation Registry (v2.0.0) calldata builders and read helpers. +// +// IMPORTANT — signing model: the serviceoffer/servicebounty controller NEVER +// signs feedback transactions. Client agents submit giveFeedback (and +// revokeFeedback) with THEIR OWN wallets; agent operators submit +// appendResponse with theirs. This package only builds calldata and reads +// recorded feedback. +// +// Function signatures verified against: +// - Spec: https://eips.ethereum.org/EIPS/eip-8004 (Reputation Registry) +// - Reference impl + official ABI: +// https://github.com/erc-8004/erc-8004-contracts +// (abis/ReputationRegistry.json, contracts/ReputationRegistryUpgradeable.sol, +// getVersion() == "2.0.0") +// +// giveFeedback(uint256 agentId, int128 value, uint8 valueDecimals, string tag1, string tag2, string endpoint, string feedbackURI, bytes32 feedbackHash) +// revokeFeedback(uint256 agentId, uint64 feedbackIndex) +// appendResponse(uint256 agentId, address clientAddress, uint64 feedbackIndex, string responseURI, bytes32 responseHash) +// getSummary(uint256 agentId, address[] clientAddresses, string tag1, string tag2) -> (uint64 count, int128 summaryValue, uint8 summaryValueDecimals) +// readFeedback(uint256 agentId, address clientAddress, uint64 feedbackIndex) -> (int128, uint8, string, string, bool) +// getLastIndex(uint256 agentId, address clientAddress) -> uint64 +// getClients(uint256 agentId) -> address[] + +import ( + "context" + _ "embed" + "fmt" + "math/big" + "strings" + "sync" + + "github.com/ethereum/go-ethereum/accounts/abi" + "github.com/ethereum/go-ethereum/accounts/abi/bind" + "github.com/ethereum/go-ethereum/common" +) + +//go:embed reputation_registry.abi.json +var reputationRegistryABI string + +// ReputationRegistryMainnet is the ERC-8004 v2.0.0 Reputation Registry on +// Ethereum mainnet and Base mainnet (deployed at the same address via +// CREATE2). The Base Sepolia deployment is the existing +// ReputationRegistryBaseSepolia constant in abi.go. +// Source: https://github.com/erc-8004/erc-8004-contracts README + +// scripts/addresses.ts; on-chain: code present on both chains, +// getVersion() == "2.0.0". +const ReputationRegistryMainnet = "0x8004BAa17C55a88189AE136b182e5fdA19dE9b63" + +// MaxFeedbackValueDecimals is the maximum valueDecimals accepted by +// giveFeedback. The contract reverts with "too many decimals" above this. +const MaxFeedbackValueDecimals = 18 + +// maxFeedbackAbsValue mirrors the contract's MAX_ABS_VALUE = 1e38 bound on +// the int128 feedback value. +var maxFeedbackAbsValue = new(big.Int).Exp(big.NewInt(10), big.NewInt(38), nil) + +var ( + reputationABIOnce sync.Once + reputationABIParsed abi.ABI + reputationABIErr error +) + +// reputationABI lazily parses the embedded Reputation Registry ABI once. +func reputationABI() (abi.ABI, error) { + reputationABIOnce.Do(func() { + reputationABIParsed, reputationABIErr = abi.JSON(strings.NewReader(reputationRegistryABI)) + }) + if reputationABIErr != nil { + return abi.ABI{}, fmt.Errorf("erc8004: parse reputation registry abi: %w", reputationABIErr) + } + return reputationABIParsed, nil +} + +// ReputationRegistryAddress maps a supported network name to the deployed +// ERC-8004 v2.0.0 Reputation Registry address. It accepts the same aliases +// as ResolveNetwork. Networks without an on-chain-verified deployment return +// an error rather than a guessed address. +func ReputationRegistryAddress(network string) (string, error) { + net, err := ResolveNetwork(network) + if err != nil { + return "", fmt.Errorf("erc8004: reputation registry: %w", err) + } + switch net.Name { + case BaseSepolia.Name: + return ReputationRegistryBaseSepolia, nil + case Base.Name, Ethereum.Name: + return ReputationRegistryMainnet, nil + default: + return "", fmt.Errorf("erc8004: no verified reputation registry deployment for network %q", net.Name) + } +} + +// EncodeGiveFeedback builds calldata for +// giveFeedback(uint256,int128,uint8,string,string,string,string,bytes32). +// value is a fixed-point score scaled by 10^valueDecimals (|value| <= 1e38, +// valueDecimals <= 18). The transaction must be submitted by the client +// agent's own wallet — the contract forbids self-feedback from the agent's +// owner/operators, and the controller never signs. tag1, tag2, endpoint, +// feedbackURI, and feedbackHash are optional per spec and may be zero values. +func EncodeGiveFeedback(agentID *big.Int, value *big.Int, valueDecimals uint8, tag1, tag2, endpoint, feedbackURI string, feedbackHash common.Hash) ([]byte, error) { + if err := checkAgentID(agentID); err != nil { + return nil, err + } + if value == nil { + return nil, fmt.Errorf("erc8004: feedback value must not be nil") + } + if value.CmpAbs(maxFeedbackAbsValue) > 0 { + return nil, fmt.Errorf("erc8004: feedback value %s out of range [-1e38, 1e38]", value) + } + if valueDecimals > MaxFeedbackValueDecimals { + return nil, fmt.Errorf("erc8004: valueDecimals %d out of range [0,%d]", valueDecimals, MaxFeedbackValueDecimals) + } + + parsed, err := reputationABI() + if err != nil { + return nil, err + } + data, err := parsed.Pack("giveFeedback", agentID, value, valueDecimals, tag1, tag2, endpoint, feedbackURI, feedbackHash) + if err != nil { + return nil, fmt.Errorf("erc8004: pack giveFeedback: %w", err) + } + return data, nil +} + +// EncodeRevokeFeedback builds calldata for revokeFeedback(uint256,uint64). +// Must be submitted by the wallet that gave the feedback. Feedback indices +// are 1-based. +func EncodeRevokeFeedback(agentID *big.Int, feedbackIndex uint64) ([]byte, error) { + if err := checkAgentID(agentID); err != nil { + return nil, err + } + if feedbackIndex == 0 { + return nil, fmt.Errorf("erc8004: feedbackIndex must be > 0 (indices are 1-based)") + } + + parsed, err := reputationABI() + if err != nil { + return nil, err + } + data, err := parsed.Pack("revokeFeedback", agentID, feedbackIndex) + if err != nil { + return nil, fmt.Errorf("erc8004: pack revokeFeedback: %w", err) + } + return data, nil +} + +// EncodeAppendResponse builds calldata for +// appendResponse(uint256,address,uint64,string,bytes32) — an on-chain reply +// to existing feedback. Submitted by the responder's own wallet. +func EncodeAppendResponse(agentID *big.Int, clientAddress common.Address, feedbackIndex uint64, responseURI string, responseHash common.Hash) ([]byte, error) { + if err := checkAgentID(agentID); err != nil { + return nil, err + } + if clientAddress == (common.Address{}) { + return nil, fmt.Errorf("erc8004: clientAddress must not be the zero address") + } + if feedbackIndex == 0 { + return nil, fmt.Errorf("erc8004: feedbackIndex must be > 0 (indices are 1-based)") + } + if responseURI == "" { + return nil, fmt.Errorf("erc8004: responseURI must not be empty") + } + + parsed, err := reputationABI() + if err != nil { + return nil, err + } + data, err := parsed.Pack("appendResponse", agentID, clientAddress, feedbackIndex, responseURI, responseHash) + if err != nil { + return nil, fmt.Errorf("erc8004: pack appendResponse: %w", err) + } + return data, nil +} + +// GiveFeedbackCall is the decoded argument set of a giveFeedback call. +type GiveFeedbackCall struct { + AgentID *big.Int + Value *big.Int + ValueDecimals uint8 + Tag1 string + Tag2 string + Endpoint string + FeedbackURI string + FeedbackHash common.Hash +} + +// DecodeGiveFeedbackCalldata decodes giveFeedback calldata (selector + +// ABI-encoded args). Useful for provenance checks on observed transactions +// and for tests. +func DecodeGiveFeedbackCalldata(data []byte) (GiveFeedbackCall, error) { + parsed, err := reputationABI() + if err != nil { + return GiveFeedbackCall{}, err + } + values, err := unpackCalldata(parsed, "giveFeedback", data) + if err != nil { + return GiveFeedbackCall{}, err + } + if len(values) != 8 { + return GiveFeedbackCall{}, fmt.Errorf("erc8004: giveFeedback arg count = %d, want 8", len(values)) + } + + out := GiveFeedbackCall{} + var ok bool + if out.AgentID, ok = values[0].(*big.Int); !ok { + return GiveFeedbackCall{}, fmt.Errorf("erc8004: agentId type = %T", values[0]) + } + if out.Value, ok = values[1].(*big.Int); !ok { + return GiveFeedbackCall{}, fmt.Errorf("erc8004: value type = %T", values[1]) + } + if out.ValueDecimals, ok = values[2].(uint8); !ok { + return GiveFeedbackCall{}, fmt.Errorf("erc8004: valueDecimals type = %T", values[2]) + } + if out.Tag1, ok = values[3].(string); !ok { + return GiveFeedbackCall{}, fmt.Errorf("erc8004: tag1 type = %T", values[3]) + } + if out.Tag2, ok = values[4].(string); !ok { + return GiveFeedbackCall{}, fmt.Errorf("erc8004: tag2 type = %T", values[4]) + } + if out.Endpoint, ok = values[5].(string); !ok { + return GiveFeedbackCall{}, fmt.Errorf("erc8004: endpoint type = %T", values[5]) + } + if out.FeedbackURI, ok = values[6].(string); !ok { + return GiveFeedbackCall{}, fmt.Errorf("erc8004: feedbackURI type = %T", values[6]) + } + hash, ok := values[7].([32]byte) + if !ok { + return GiveFeedbackCall{}, fmt.Errorf("erc8004: feedbackHash type = %T", values[7]) + } + out.FeedbackHash = common.Hash(hash) + return out, nil +} + +// RevokeFeedbackCall is the decoded argument set of a revokeFeedback call. +type RevokeFeedbackCall struct { + AgentID *big.Int + FeedbackIndex uint64 +} + +// DecodeRevokeFeedbackCalldata decodes revokeFeedback calldata. +func DecodeRevokeFeedbackCalldata(data []byte) (RevokeFeedbackCall, error) { + parsed, err := reputationABI() + if err != nil { + return RevokeFeedbackCall{}, err + } + values, err := unpackCalldata(parsed, "revokeFeedback", data) + if err != nil { + return RevokeFeedbackCall{}, err + } + if len(values) != 2 { + return RevokeFeedbackCall{}, fmt.Errorf("erc8004: revokeFeedback arg count = %d, want 2", len(values)) + } + + out := RevokeFeedbackCall{} + var ok bool + if out.AgentID, ok = values[0].(*big.Int); !ok { + return RevokeFeedbackCall{}, fmt.Errorf("erc8004: agentId type = %T", values[0]) + } + if out.FeedbackIndex, ok = values[1].(uint64); !ok { + return RevokeFeedbackCall{}, fmt.Errorf("erc8004: feedbackIndex type = %T", values[1]) + } + return out, nil +} + +// AppendResponseCall is the decoded argument set of an appendResponse call. +type AppendResponseCall struct { + AgentID *big.Int + ClientAddress common.Address + FeedbackIndex uint64 + ResponseURI string + ResponseHash common.Hash +} + +// DecodeAppendResponseCalldata decodes appendResponse calldata. +func DecodeAppendResponseCalldata(data []byte) (AppendResponseCall, error) { + parsed, err := reputationABI() + if err != nil { + return AppendResponseCall{}, err + } + values, err := unpackCalldata(parsed, "appendResponse", data) + if err != nil { + return AppendResponseCall{}, err + } + if len(values) != 5 { + return AppendResponseCall{}, fmt.Errorf("erc8004: appendResponse arg count = %d, want 5", len(values)) + } + + out := AppendResponseCall{} + var ok bool + if out.AgentID, ok = values[0].(*big.Int); !ok { + return AppendResponseCall{}, fmt.Errorf("erc8004: agentId type = %T", values[0]) + } + if out.ClientAddress, ok = values[1].(common.Address); !ok { + return AppendResponseCall{}, fmt.Errorf("erc8004: clientAddress type = %T", values[1]) + } + if out.FeedbackIndex, ok = values[2].(uint64); !ok { + return AppendResponseCall{}, fmt.Errorf("erc8004: feedbackIndex type = %T", values[2]) + } + if out.ResponseURI, ok = values[3].(string); !ok { + return AppendResponseCall{}, fmt.Errorf("erc8004: responseURI type = %T", values[3]) + } + hash, ok := values[4].([32]byte) + if !ok { + return AppendResponseCall{}, fmt.Errorf("erc8004: responseHash type = %T", values[4]) + } + out.ResponseHash = common.Hash(hash) + return out, nil +} + +// FeedbackSummary mirrors the reputation getSummary return values. The +// aggregate score is SummaryValue scaled by 10^-SummaryValueDecimals. +type FeedbackSummary struct { + Count uint64 + SummaryValue *big.Int + SummaryValueDecimals uint8 +} + +// FeedbackEntry mirrors readFeedback return values. +type FeedbackEntry struct { + Value *big.Int + ValueDecimals uint8 + Tag1 string + Tag2 string + IsRevoked bool +} + +// ReputationReader provides read-only access to a Reputation Registry. The +// controller uses it to observe recorded feedback; it holds no signer. +type ReputationReader struct { + contract *bind.BoundContract +} + +// NewReputationReader binds a read-only Reputation Registry at +// registryAddress. caller is typically (*erc8004.Client).ETH() or any +// *ethclient.Client. +func NewReputationReader(caller bind.ContractCaller, registryAddress string) (*ReputationReader, error) { + if caller == nil { + return nil, fmt.Errorf("erc8004: reputation reader: caller must not be nil") + } + if !common.IsHexAddress(registryAddress) { + return nil, fmt.Errorf("erc8004: reputation reader: invalid registry address %q", registryAddress) + } + parsed, err := reputationABI() + if err != nil { + return nil, err + } + return &ReputationReader{ + contract: bind.NewBoundContract(common.HexToAddress(registryAddress), parsed, caller, nil, nil), + }, nil +} + +// Summary reads getSummary(agentId, clientAddresses, tag1, tag2). +func (r *ReputationReader) Summary(ctx context.Context, agentID *big.Int, clientAddresses []common.Address, tag1, tag2 string) (FeedbackSummary, error) { + if err := checkAgentID(agentID); err != nil { + return FeedbackSummary{}, err + } + if clientAddresses == nil { + clientAddresses = []common.Address{} + } + var out []interface{} + if err := r.contract.Call(&bind.CallOpts{Context: ctx}, &out, "getSummary", agentID, clientAddresses, tag1, tag2); err != nil { + return FeedbackSummary{}, fmt.Errorf("erc8004: reputation getSummary: %w", err) + } + if len(out) != 3 { + return FeedbackSummary{}, fmt.Errorf("erc8004: reputation getSummary returned %d values, want 3", len(out)) + } + + summary := FeedbackSummary{} + var ok bool + if summary.Count, ok = out[0].(uint64); !ok { + return FeedbackSummary{}, fmt.Errorf("erc8004: reputation getSummary count type = %T", out[0]) + } + if summary.SummaryValue, ok = out[1].(*big.Int); !ok { + return FeedbackSummary{}, fmt.Errorf("erc8004: reputation getSummary summaryValue type = %T", out[1]) + } + if summary.SummaryValueDecimals, ok = out[2].(uint8); !ok { + return FeedbackSummary{}, fmt.Errorf("erc8004: reputation getSummary summaryValueDecimals type = %T", out[2]) + } + return summary, nil +} + +// ReadFeedback reads readFeedback(agentId, clientAddress, feedbackIndex). +// Feedback indices are 1-based. +func (r *ReputationReader) ReadFeedback(ctx context.Context, agentID *big.Int, clientAddress common.Address, feedbackIndex uint64) (FeedbackEntry, error) { + if err := checkAgentID(agentID); err != nil { + return FeedbackEntry{}, err + } + var out []interface{} + if err := r.contract.Call(&bind.CallOpts{Context: ctx}, &out, "readFeedback", agentID, clientAddress, feedbackIndex); err != nil { + return FeedbackEntry{}, fmt.Errorf("erc8004: readFeedback: %w", err) + } + if len(out) != 5 { + return FeedbackEntry{}, fmt.Errorf("erc8004: readFeedback returned %d values, want 5", len(out)) + } + + entry := FeedbackEntry{} + var ok bool + if entry.Value, ok = out[0].(*big.Int); !ok { + return FeedbackEntry{}, fmt.Errorf("erc8004: readFeedback value type = %T", out[0]) + } + if entry.ValueDecimals, ok = out[1].(uint8); !ok { + return FeedbackEntry{}, fmt.Errorf("erc8004: readFeedback valueDecimals type = %T", out[1]) + } + if entry.Tag1, ok = out[2].(string); !ok { + return FeedbackEntry{}, fmt.Errorf("erc8004: readFeedback tag1 type = %T", out[2]) + } + if entry.Tag2, ok = out[3].(string); !ok { + return FeedbackEntry{}, fmt.Errorf("erc8004: readFeedback tag2 type = %T", out[3]) + } + if entry.IsRevoked, ok = out[4].(bool); !ok { + return FeedbackEntry{}, fmt.Errorf("erc8004: readFeedback isRevoked type = %T", out[4]) + } + return entry, nil +} + +// LastIndex reads getLastIndex(agentId, clientAddress) — the most recent +// (1-based) feedback index the client has submitted for the agent; 0 when +// none. +func (r *ReputationReader) LastIndex(ctx context.Context, agentID *big.Int, clientAddress common.Address) (uint64, error) { + if err := checkAgentID(agentID); err != nil { + return 0, err + } + var out []interface{} + if err := r.contract.Call(&bind.CallOpts{Context: ctx}, &out, "getLastIndex", agentID, clientAddress); err != nil { + return 0, fmt.Errorf("erc8004: getLastIndex: %w", err) + } + if len(out) != 1 { + return 0, fmt.Errorf("erc8004: getLastIndex returned %d values, want 1", len(out)) + } + idx, ok := out[0].(uint64) + if !ok { + return 0, fmt.Errorf("erc8004: getLastIndex type = %T", out[0]) + } + return idx, nil +} diff --git a/internal/erc8004/reputation_registry.abi.json b/internal/erc8004/reputation_registry.abi.json new file mode 100644 index 00000000..9948315b --- /dev/null +++ b/internal/erc8004/reputation_registry.abi.json @@ -0,0 +1,391 @@ +[ + { + "inputs": [ + { + "internalType": "uint256", + "name": "agentId", + "type": "uint256" + }, + { + "internalType": "int128", + "name": "value", + "type": "int128" + }, + { + "internalType": "uint8", + "name": "valueDecimals", + "type": "uint8" + }, + { + "internalType": "string", + "name": "tag1", + "type": "string" + }, + { + "internalType": "string", + "name": "tag2", + "type": "string" + }, + { + "internalType": "string", + "name": "endpoint", + "type": "string" + }, + { + "internalType": "string", + "name": "feedbackURI", + "type": "string" + }, + { + "internalType": "bytes32", + "name": "feedbackHash", + "type": "bytes32" + } + ], + "name": "giveFeedback", + "outputs": [], + "stateMutability": "nonpayable", + "type": "function" + }, + { + "inputs": [ + { + "internalType": "uint256", + "name": "agentId", + "type": "uint256" + }, + { + "internalType": "uint64", + "name": "feedbackIndex", + "type": "uint64" + } + ], + "name": "revokeFeedback", + "outputs": [], + "stateMutability": "nonpayable", + "type": "function" + }, + { + "inputs": [ + { + "internalType": "uint256", + "name": "agentId", + "type": "uint256" + }, + { + "internalType": "address", + "name": "clientAddress", + "type": "address" + }, + { + "internalType": "uint64", + "name": "feedbackIndex", + "type": "uint64" + }, + { + "internalType": "string", + "name": "responseURI", + "type": "string" + }, + { + "internalType": "bytes32", + "name": "responseHash", + "type": "bytes32" + } + ], + "name": "appendResponse", + "outputs": [], + "stateMutability": "nonpayable", + "type": "function" + }, + { + "inputs": [ + { + "internalType": "uint256", + "name": "agentId", + "type": "uint256" + }, + { + "internalType": "address[]", + "name": "clientAddresses", + "type": "address[]" + }, + { + "internalType": "string", + "name": "tag1", + "type": "string" + }, + { + "internalType": "string", + "name": "tag2", + "type": "string" + } + ], + "name": "getSummary", + "outputs": [ + { + "internalType": "uint64", + "name": "count", + "type": "uint64" + }, + { + "internalType": "int128", + "name": "summaryValue", + "type": "int128" + }, + { + "internalType": "uint8", + "name": "summaryValueDecimals", + "type": "uint8" + } + ], + "stateMutability": "view", + "type": "function" + }, + { + "inputs": [ + { + "internalType": "uint256", + "name": "agentId", + "type": "uint256" + }, + { + "internalType": "address", + "name": "clientAddress", + "type": "address" + }, + { + "internalType": "uint64", + "name": "feedbackIndex", + "type": "uint64" + } + ], + "name": "readFeedback", + "outputs": [ + { + "internalType": "int128", + "name": "value", + "type": "int128" + }, + { + "internalType": "uint8", + "name": "valueDecimals", + "type": "uint8" + }, + { + "internalType": "string", + "name": "tag1", + "type": "string" + }, + { + "internalType": "string", + "name": "tag2", + "type": "string" + }, + { + "internalType": "bool", + "name": "isRevoked", + "type": "bool" + } + ], + "stateMutability": "view", + "type": "function" + }, + { + "inputs": [ + { + "internalType": "uint256", + "name": "agentId", + "type": "uint256" + }, + { + "internalType": "address", + "name": "clientAddress", + "type": "address" + } + ], + "name": "getLastIndex", + "outputs": [ + { + "internalType": "uint64", + "name": "", + "type": "uint64" + } + ], + "stateMutability": "view", + "type": "function" + }, + { + "inputs": [ + { + "internalType": "uint256", + "name": "agentId", + "type": "uint256" + } + ], + "name": "getClients", + "outputs": [ + { + "internalType": "address[]", + "name": "", + "type": "address[]" + } + ], + "stateMutability": "view", + "type": "function" + }, + { + "inputs": [], + "name": "getIdentityRegistry", + "outputs": [ + { + "internalType": "address", + "name": "", + "type": "address" + } + ], + "stateMutability": "view", + "type": "function" + }, + { + "anonymous": false, + "inputs": [ + { + "indexed": true, + "internalType": "uint256", + "name": "agentId", + "type": "uint256" + }, + { + "indexed": true, + "internalType": "address", + "name": "clientAddress", + "type": "address" + }, + { + "indexed": false, + "internalType": "uint64", + "name": "feedbackIndex", + "type": "uint64" + }, + { + "indexed": false, + "internalType": "int128", + "name": "value", + "type": "int128" + }, + { + "indexed": false, + "internalType": "uint8", + "name": "valueDecimals", + "type": "uint8" + }, + { + "indexed": true, + "internalType": "string", + "name": "indexedTag1", + "type": "string" + }, + { + "indexed": false, + "internalType": "string", + "name": "tag1", + "type": "string" + }, + { + "indexed": false, + "internalType": "string", + "name": "tag2", + "type": "string" + }, + { + "indexed": false, + "internalType": "string", + "name": "endpoint", + "type": "string" + }, + { + "indexed": false, + "internalType": "string", + "name": "feedbackURI", + "type": "string" + }, + { + "indexed": false, + "internalType": "bytes32", + "name": "feedbackHash", + "type": "bytes32" + } + ], + "name": "NewFeedback", + "type": "event" + }, + { + "anonymous": false, + "inputs": [ + { + "indexed": true, + "internalType": "uint256", + "name": "agentId", + "type": "uint256" + }, + { + "indexed": true, + "internalType": "address", + "name": "clientAddress", + "type": "address" + }, + { + "indexed": true, + "internalType": "uint64", + "name": "feedbackIndex", + "type": "uint64" + } + ], + "name": "FeedbackRevoked", + "type": "event" + }, + { + "anonymous": false, + "inputs": [ + { + "indexed": true, + "internalType": "uint256", + "name": "agentId", + "type": "uint256" + }, + { + "indexed": true, + "internalType": "address", + "name": "clientAddress", + "type": "address" + }, + { + "indexed": false, + "internalType": "uint64", + "name": "feedbackIndex", + "type": "uint64" + }, + { + "indexed": true, + "internalType": "address", + "name": "responder", + "type": "address" + }, + { + "indexed": false, + "internalType": "string", + "name": "responseURI", + "type": "string" + }, + { + "indexed": false, + "internalType": "bytes32", + "name": "responseHash", + "type": "bytes32" + } + ], + "name": "ResponseAppended", + "type": "event" + } +] diff --git a/internal/erc8004/reputation_test.go b/internal/erc8004/reputation_test.go new file mode 100644 index 00000000..a225b5e0 --- /dev/null +++ b/internal/erc8004/reputation_test.go @@ -0,0 +1,410 @@ +package erc8004 + +import ( + "context" + "encoding/hex" + "math/big" + "strings" + "testing" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/crypto" +) + +func TestReputationABI_Parses(t *testing.T) { + if _, err := reputationABI(); err != nil { + t.Fatalf("embedded reputation ABI failed to parse: %v", err) + } +} + +// TestReputationABI_SelectorGoldenValues pins the 4-byte selectors of the +// verified v2.0.0 signatures (spec: https://eips.ethereum.org/EIPS/eip-8004; +// ABI: https://github.com/erc-8004/erc-8004-contracts). Each golden value is +// cross-checked against keccak256 of the canonical signature string and the +// parsed ABI method. +func TestReputationABI_SelectorGoldenValues(t *testing.T) { + parsed, err := reputationABI() + if err != nil { + t.Fatal(err) + } + + tests := []struct { + method string + sig string + selector string + }{ + {"giveFeedback", "giveFeedback(uint256,int128,uint8,string,string,string,string,bytes32)", "3c036a7e"}, + {"revokeFeedback", "revokeFeedback(uint256,uint64)", "4ab3ca99"}, + {"appendResponse", "appendResponse(uint256,address,uint64,string,bytes32)", "c2349ab2"}, + {"getSummary", "getSummary(uint256,address[],string,string)", "81bbba58"}, + {"readFeedback", "readFeedback(uint256,address,uint64)", "232b0810"}, + {"getLastIndex", "getLastIndex(uint256,address)", "f2d81759"}, + {"getClients", "getClients(uint256)", "42dd519c"}, + {"getIdentityRegistry", "getIdentityRegistry()", "bc4d861b"}, + } + + for _, tt := range tests { + t.Run(tt.method, func(t *testing.T) { + m, ok := parsed.Methods[tt.method] + if !ok { + t.Fatalf("method %q missing from parsed ABI", tt.method) + } + if m.Sig != tt.sig { + t.Errorf("signature = %q, want %q", m.Sig, tt.sig) + } + if got := hex.EncodeToString(m.ID); got != tt.selector { + t.Errorf("parsed selector = 0x%s, want 0x%s", got, tt.selector) + } + if got := hex.EncodeToString(crypto.Keccak256([]byte(tt.sig))[:4]); got != tt.selector { + t.Errorf("keccak256(%q)[:4] = 0x%s, want 0x%s", tt.sig, got, tt.selector) + } + }) + } +} + +func TestReputationABI_EventsPresent(t *testing.T) { + parsed, err := reputationABI() + if err != nil { + t.Fatal(err) + } + for _, name := range []string{"NewFeedback", "FeedbackRevoked", "ResponseAppended"} { + if _, ok := parsed.Events[name]; !ok { + t.Errorf("missing event %q in parsed ABI", name) + } + } +} + +func TestEncodeGiveFeedback_RoundTrip(t *testing.T) { + agentID := big.NewInt(42) + value := big.NewInt(-875) // -87.5 with valueDecimals=1 + feedbackHash := crypto.Keccak256Hash([]byte("feedback payload")) + + data, err := EncodeGiveFeedback(agentID, value, 1, "code-review", "go", "https://agent.example/v1", "ipfs://bafy.../fb.json", feedbackHash) + if err != nil { + t.Fatalf("EncodeGiveFeedback: %v", err) + } + if got := hex.EncodeToString(data[:4]); got != "3c036a7e" { + t.Errorf("selector = 0x%s, want 0x3c036a7e", got) + } + + decoded, err := DecodeGiveFeedbackCalldata(data) + if err != nil { + t.Fatalf("DecodeGiveFeedbackCalldata: %v", err) + } + if decoded.AgentID.Cmp(agentID) != 0 { + t.Errorf("agentId = %s, want %s", decoded.AgentID, agentID) + } + if decoded.Value.Cmp(value) != 0 { + t.Errorf("value = %s, want %s", decoded.Value, value) + } + if decoded.ValueDecimals != 1 { + t.Errorf("valueDecimals = %d, want 1", decoded.ValueDecimals) + } + if decoded.Tag1 != "code-review" || decoded.Tag2 != "go" { + t.Errorf("tags = (%q, %q), want (code-review, go)", decoded.Tag1, decoded.Tag2) + } + if decoded.Endpoint != "https://agent.example/v1" { + t.Errorf("endpoint = %q", decoded.Endpoint) + } + if decoded.FeedbackURI != "ipfs://bafy.../fb.json" { + t.Errorf("feedbackURI = %q", decoded.FeedbackURI) + } + if decoded.FeedbackHash != feedbackHash { + t.Errorf("feedbackHash = %s, want %s", decoded.FeedbackHash, feedbackHash) + } +} + +func TestEncodeRevokeFeedback_RoundTrip(t *testing.T) { + data, err := EncodeRevokeFeedback(big.NewInt(42), 7) + if err != nil { + t.Fatalf("EncodeRevokeFeedback: %v", err) + } + if got := hex.EncodeToString(data[:4]); got != "4ab3ca99" { + t.Errorf("selector = 0x%s, want 0x4ab3ca99", got) + } + + decoded, err := DecodeRevokeFeedbackCalldata(data) + if err != nil { + t.Fatalf("DecodeRevokeFeedbackCalldata: %v", err) + } + if decoded.AgentID.Cmp(big.NewInt(42)) != 0 || decoded.FeedbackIndex != 7 { + t.Errorf("decoded = %+v, want agentId=42 feedbackIndex=7", decoded) + } +} + +func TestEncodeAppendResponse_RoundTrip(t *testing.T) { + client := common.HexToAddress("0x4444444444444444444444444444444444444444") + respHash := crypto.Keccak256Hash([]byte("response payload")) + + data, err := EncodeAppendResponse(big.NewInt(42), client, 7, "ipfs://bafy.../resp.json", respHash) + if err != nil { + t.Fatalf("EncodeAppendResponse: %v", err) + } + if got := hex.EncodeToString(data[:4]); got != "c2349ab2" { + t.Errorf("selector = 0x%s, want 0xc2349ab2", got) + } + + decoded, err := DecodeAppendResponseCalldata(data) + if err != nil { + t.Fatalf("DecodeAppendResponseCalldata: %v", err) + } + if decoded.AgentID.Cmp(big.NewInt(42)) != 0 { + t.Errorf("agentId = %s, want 42", decoded.AgentID) + } + if decoded.ClientAddress != client { + t.Errorf("clientAddress = %s, want %s", decoded.ClientAddress, client) + } + if decoded.FeedbackIndex != 7 { + t.Errorf("feedbackIndex = %d, want 7", decoded.FeedbackIndex) + } + if decoded.ResponseURI != "ipfs://bafy.../resp.json" { + t.Errorf("responseURI = %q", decoded.ResponseURI) + } + if decoded.ResponseHash != respHash { + t.Errorf("responseHash = %s, want %s", decoded.ResponseHash, respHash) + } +} + +func TestEncodeGiveFeedback_BadInput(t *testing.T) { + hash := crypto.Keccak256Hash([]byte("x")) + overMax := new(big.Int).Add(maxFeedbackAbsValue, big.NewInt(1)) + underMin := new(big.Int).Neg(overMax) + + tests := []struct { + name string + fn func() ([]byte, error) + }{ + {"nil agentId", func() ([]byte, error) { + return EncodeGiveFeedback(nil, big.NewInt(1), 0, "", "", "", "", hash) + }}, + {"negative agentId", func() ([]byte, error) { + return EncodeGiveFeedback(big.NewInt(-1), big.NewInt(1), 0, "", "", "", "", hash) + }}, + {"nil value", func() ([]byte, error) { + return EncodeGiveFeedback(big.NewInt(1), nil, 0, "", "", "", "", hash) + }}, + {"value over 1e38", func() ([]byte, error) { + return EncodeGiveFeedback(big.NewInt(1), overMax, 0, "", "", "", "", hash) + }}, + {"value under -1e38", func() ([]byte, error) { + return EncodeGiveFeedback(big.NewInt(1), underMin, 0, "", "", "", "", hash) + }}, + {"valueDecimals 19", func() ([]byte, error) { + return EncodeGiveFeedback(big.NewInt(1), big.NewInt(1), 19, "", "", "", "", hash) + }}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if _, err := tt.fn(); err == nil { + t.Error("expected error, got nil") + } + }) + } + + // Boundary values must be accepted. + if _, err := EncodeGiveFeedback(big.NewInt(1), maxFeedbackAbsValue, MaxFeedbackValueDecimals, "", "", "", "", common.Hash{}); err != nil { + t.Errorf("value 1e38, decimals 18 should be accepted: %v", err) + } + if _, err := EncodeGiveFeedback(big.NewInt(1), new(big.Int).Neg(maxFeedbackAbsValue), 0, "", "", "", "", common.Hash{}); err != nil { + t.Errorf("value -1e38 should be accepted: %v", err) + } +} + +func TestEncodeRevokeFeedback_BadInput(t *testing.T) { + if _, err := EncodeRevokeFeedback(nil, 1); err == nil { + t.Error("nil agentId: expected error") + } + if _, err := EncodeRevokeFeedback(big.NewInt(1), 0); err == nil { + t.Error("feedbackIndex 0: expected error") + } +} + +func TestEncodeAppendResponse_BadInput(t *testing.T) { + client := common.HexToAddress("0x4444444444444444444444444444444444444444") + if _, err := EncodeAppendResponse(nil, client, 1, "u", common.Hash{}); err == nil { + t.Error("nil agentId: expected error") + } + if _, err := EncodeAppendResponse(big.NewInt(1), common.Address{}, 1, "u", common.Hash{}); err == nil { + t.Error("zero clientAddress: expected error") + } + if _, err := EncodeAppendResponse(big.NewInt(1), client, 0, "u", common.Hash{}); err == nil { + t.Error("feedbackIndex 0: expected error") + } + if _, err := EncodeAppendResponse(big.NewInt(1), client, 1, "", common.Hash{}); err == nil { + t.Error("empty responseURI: expected error") + } +} + +func TestDecodeReputationCalldata_Errors(t *testing.T) { + t.Run("too short", func(t *testing.T) { + if _, err := DecodeGiveFeedbackCalldata([]byte{0x3c}); err == nil { + t.Error("expected error for short calldata") + } + }) + + t.Run("wrong selector", func(t *testing.T) { + data, err := EncodeRevokeFeedback(big.NewInt(1), 1) + if err != nil { + t.Fatal(err) + } + if _, err := DecodeGiveFeedbackCalldata(data); err == nil { + t.Error("expected selector mismatch error") + } else if !strings.Contains(err.Error(), "selector mismatch") { + t.Errorf("error = %v, want selector mismatch", err) + } + }) + + t.Run("truncated args", func(t *testing.T) { + data, err := EncodeGiveFeedback(big.NewInt(1), big.NewInt(50), 0, "t1", "t2", "e", "u", common.Hash{}) + if err != nil { + t.Fatal(err) + } + // Cut the entire trailing dynamic section so the feedbackURI offset + // points past the end of the payload. + if _, err := DecodeGiveFeedbackCalldata(data[:len(data)-96]); err == nil { + t.Error("expected error for truncated calldata") + } + }) +} + +func TestReputationRegistryAddress(t *testing.T) { + tests := []struct { + network string + want string + wantErr bool + }{ + {"base-sepolia", ReputationRegistryBaseSepolia, false}, + {"base", ReputationRegistryMainnet, false}, + {"base-mainnet", ReputationRegistryMainnet, false}, + {"ethereum", ReputationRegistryMainnet, false}, + {"mainnet", ReputationRegistryMainnet, false}, + {"solana", "", true}, + {"", "", true}, + } + for _, tt := range tests { + t.Run(tt.network, func(t *testing.T) { + got, err := ReputationRegistryAddress(tt.network) + if tt.wantErr { + if err == nil { + t.Errorf("expected error for %q, got address %s", tt.network, got) + } + return + } + if err != nil { + t.Fatalf("ReputationRegistryAddress(%q): %v", tt.network, err) + } + if got != tt.want { + t.Errorf("address = %s, want %s", got, tt.want) + } + }) + } +} + +func TestNewReputationReader_BadInput(t *testing.T) { + if _, err := NewReputationReader(nil, ReputationRegistryBaseSepolia); err == nil { + t.Error("nil caller: expected error") + } + if _, err := NewReputationReader(&stubCaller{}, "0xZZ"); err == nil { + t.Error("bad address: expected error") + } +} + +func TestReputationReader_Summary(t *testing.T) { + parsed, err := reputationABI() + if err != nil { + t.Fatal(err) + } + ret, err := parsed.Methods["getSummary"].Outputs.Pack(uint64(12), big.NewInt(925), uint8(1)) + if err != nil { + t.Fatal(err) + } + + caller := &stubCaller{ret: ret} + reader, err := NewReputationReader(caller, ReputationRegistryBaseSepolia) + if err != nil { + t.Fatal(err) + } + + summary, err := reader.Summary(context.Background(), big.NewInt(42), nil, "code-review", "") + if err != nil { + t.Fatalf("Summary: %v", err) + } + if summary.Count != 12 { + t.Errorf("count = %d, want 12", summary.Count) + } + if summary.SummaryValue.Cmp(big.NewInt(925)) != 0 { + t.Errorf("summaryValue = %s, want 925", summary.SummaryValue) + } + if summary.SummaryValueDecimals != 1 { + t.Errorf("summaryValueDecimals = %d, want 1", summary.SummaryValueDecimals) + } + + wantData, err := parsed.Pack("getSummary", big.NewInt(42), []common.Address{}, "code-review", "") + if err != nil { + t.Fatal(err) + } + if hex.EncodeToString(caller.lastCall.Data) != hex.EncodeToString(wantData) { + t.Errorf("call data = 0x%x, want 0x%x", caller.lastCall.Data, wantData) + } + + if _, err := reader.Summary(context.Background(), nil, nil, "", ""); err == nil { + t.Error("nil agentId: expected error") + } +} + +func TestReputationReader_ReadFeedback(t *testing.T) { + parsed, err := reputationABI() + if err != nil { + t.Fatal(err) + } + ret, err := parsed.Methods["readFeedback"].Outputs.Pack(big.NewInt(-50), uint8(0), "code-review", "go", true) + if err != nil { + t.Fatal(err) + } + + reader, err := NewReputationReader(&stubCaller{ret: ret}, ReputationRegistryBaseSepolia) + if err != nil { + t.Fatal(err) + } + + entry, err := reader.ReadFeedback(context.Background(), big.NewInt(42), common.HexToAddress("0x4444444444444444444444444444444444444444"), 3) + if err != nil { + t.Fatalf("ReadFeedback: %v", err) + } + if entry.Value.Cmp(big.NewInt(-50)) != 0 { + t.Errorf("value = %s, want -50", entry.Value) + } + if entry.ValueDecimals != 0 { + t.Errorf("valueDecimals = %d, want 0", entry.ValueDecimals) + } + if entry.Tag1 != "code-review" || entry.Tag2 != "go" { + t.Errorf("tags = (%q, %q)", entry.Tag1, entry.Tag2) + } + if !entry.IsRevoked { + t.Error("isRevoked = false, want true") + } +} + +func TestReputationReader_LastIndex(t *testing.T) { + parsed, err := reputationABI() + if err != nil { + t.Fatal(err) + } + ret, err := parsed.Methods["getLastIndex"].Outputs.Pack(uint64(9)) + if err != nil { + t.Fatal(err) + } + + reader, err := NewReputationReader(&stubCaller{ret: ret}, ReputationRegistryBaseSepolia) + if err != nil { + t.Fatal(err) + } + + idx, err := reader.LastIndex(context.Background(), big.NewInt(42), common.HexToAddress("0x4444444444444444444444444444444444444444")) + if err != nil { + t.Fatalf("LastIndex: %v", err) + } + if idx != 9 { + t.Errorf("lastIndex = %d, want 9", idx) + } +} diff --git a/internal/erc8004/revert.go b/internal/erc8004/revert.go index bf8d3198..3f7d0c68 100644 --- a/internal/erc8004/revert.go +++ b/internal/erc8004/revert.go @@ -19,8 +19,8 @@ import ( // - revert("message") → ABI-encoded Error(string) → "message" // - panic(N) → ABI-encoded Panic(uint256) → "panic: " // - revert CustomError(...) → 4-byte selector with no public ABI → -// "custom error 0x" (so an -// operator can grep the contract source) +// "custom error 0x" (so an +// operator can grep the contract source) // // The whole point: when an ERC-8004 setMetadata reverts at gas-estimation // time, the Geth/Reth node returns the revert payload as the `data` field of diff --git a/internal/erc8004/skill_tags.go b/internal/erc8004/skill_tags.go new file mode 100644 index 00000000..2ac23140 --- /dev/null +++ b/internal/erc8004/skill_tags.go @@ -0,0 +1,106 @@ +// Skill marketplace ↔ ERC-8004 tag + metadata-key convention. +// +// Skill ratings ride the Reputation Registry's giveFeedback tag pair +// using the ERC-8239 draft "Agent Skill Rating" convention: +// +// tag1 = "asr:skill" +// tag2 = "eip155::::@" +// +// This file implements the obol interim form of the ERC-8239 draft +// (ethereum/EIPs PR #1704) tag2: the registry address is lowercased hex +// for determinism (giveFeedback tags are exact-match strings on-chain, +// so a mixed-case address would silently fork the rating namespace) and +// the agentId is rendered in decimal. The skill ref is "@". +// +// Bundle integrity is anchored on the Identity Registry via setMetadata +// with key "skill.sha256:@" and the 64-char ASCII +// lowercase hex sha256 of the gzipped bundle bytes as the value — +// ASCII hex rather than raw bytes so block explorers render it legibly +// and GetMetadata comparison is a bytes.Equal on the hex string. +// +// Signing model is identical to the rest of this package: the CLI only +// builds calldata; the operator/buyer submits with their OWN wallet. +// The controller NEVER signs. + +package erc8004 + +import ( + "fmt" + "math/big" + "strings" +) + +// SkillTag1 is the fixed tag1 for skill-rating feedback entries +// (ERC-8239 draft "asr" = agent skill rating). +const SkillTag1 = "asr:skill" + +// skillHashKeyPrefix prefixes the Identity Registry setMetadata key +// that anchors a skill bundle's sha256. +const skillHashKeyPrefix = "skill.sha256:" + +// SkillRef builds the canonical "@" skill reference. +// Both parts must be non-empty and free of ':' (tag2 is colon- +// delimited) and '@' (the ref separator). +func SkillRef(name, version string) (string, error) { + if err := checkSkillRefPart("skill name", name); err != nil { + return "", err + } + if err := checkSkillRefPart("skill version", version); err != nil { + return "", err + } + return name + "@" + version, nil +} + +// ParseSkillRef splits a "@" reference and re-validates +// both parts. Use it to normalize operator-supplied refs before they +// reach a tag or metadata key. +func ParseSkillRef(ref string) (name, version string, err error) { + name, version, ok := strings.Cut(strings.TrimSpace(ref), "@") + if !ok { + return "", "", fmt.Errorf("erc8004: skill ref %q must be @ (e.g. buy-x402@0.1.0)", ref) + } + if _, err := SkillRef(name, version); err != nil { + return "", "", err + } + return name, version, nil +} + +func checkSkillRefPart(what, v string) error { + if strings.TrimSpace(v) == "" { + return fmt.Errorf("erc8004: %s must not be empty", what) + } + if strings.ContainsAny(v, ":@") { + return fmt.Errorf("erc8004: %s %q must not contain ':' or '@'", what, v) + } + return nil +} + +// SkillTag2 builds the ERC-8239-style tag2 binding a rating to one +// skill of one agent on one registry deployment: +// +// eip155:::: +// +// skillRef must be a valid "@" reference (see SkillRef). +func SkillTag2(net NetworkConfig, agentID *big.Int, skillRef string) (string, error) { + if err := checkAgentID(agentID); err != nil { + return "", err + } + if _, _, err := ParseSkillRef(skillRef); err != nil { + return "", err + } + return fmt.Sprintf("eip155:%d:%s:%s:%s", + net.ChainID, + strings.ToLower(net.RegistryAddress), + agentID.String(), + skillRef, + ), nil +} + +// SkillHashMetadataKey returns the Identity Registry setMetadata key +// under which a skill bundle's sha256 is anchored: +// "skill.sha256:@". The metadata VALUE is the 64-char +// ASCII lowercase hex sha256 of the gzipped bundle bytes, stored as +// []byte(hex). +func SkillHashMetadataKey(skillRef string) string { + return skillHashKeyPrefix + skillRef +} diff --git a/internal/erc8004/skill_tags_test.go b/internal/erc8004/skill_tags_test.go new file mode 100644 index 00000000..eb51b43f --- /dev/null +++ b/internal/erc8004/skill_tags_test.go @@ -0,0 +1,303 @@ +package erc8004 + +import ( + "bytes" + "encoding/hex" + "math/big" + "strings" + "testing" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/crypto" +) + +func TestSkillTag1_Constant(t *testing.T) { + // ERC-8239 draft "asr" tag1 — changing this forks the rating + // namespace for every previously submitted skill feedback entry. + if SkillTag1 != "asr:skill" { + t.Fatalf("SkillTag1 = %q, want %q", SkillTag1, "asr:skill") + } +} + +func TestSkillRef(t *testing.T) { + tests := []struct { + name string + skill string + version string + want string + wantErr string + }{ + {name: "ok", skill: "buy-x402", version: "0.1.0", want: "buy-x402@0.1.0"}, + {name: "ok with prerelease", skill: "monetize", version: "1.0.0-rc1", want: "monetize@1.0.0-rc1"}, + {name: "empty name", skill: "", version: "0.1.0", wantErr: "must not be empty"}, + {name: "empty version", skill: "buy-x402", version: "", wantErr: "must not be empty"}, + {name: "colon in name", skill: "buy:x402", version: "0.1.0", wantErr: "must not contain"}, + {name: "colon in version", skill: "buy-x402", version: "0:1", wantErr: "must not contain"}, + {name: "at in name", skill: "buy@x402", version: "0.1.0", wantErr: "must not contain"}, + {name: "at in version", skill: "buy-x402", version: "0@1", wantErr: "must not contain"}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := SkillRef(tt.skill, tt.version) + if tt.wantErr != "" { + if err == nil || !strings.Contains(err.Error(), tt.wantErr) { + t.Fatalf("err = %v, want substring %q", err, tt.wantErr) + } + return + } + if err != nil { + t.Fatal(err) + } + if got != tt.want { + t.Errorf("SkillRef = %q, want %q", got, tt.want) + } + }) + } +} + +func TestParseSkillRef(t *testing.T) { + tests := []struct { + ref string + wantName string + wantVersion string + wantErr bool + }{ + {ref: "buy-x402@0.1.0", wantName: "buy-x402", wantVersion: "0.1.0"}, + {ref: " buy-x402@0.1.0 ", wantName: "buy-x402", wantVersion: "0.1.0"}, + {ref: "buy-x402", wantErr: true}, + {ref: "@0.1.0", wantErr: true}, + {ref: "buy-x402@", wantErr: true}, + {ref: "a@b@c", wantErr: true}, // version part keeps the second '@' + {ref: "", wantErr: true}, + } + + for _, tt := range tests { + t.Run(tt.ref, func(t *testing.T) { + name, version, err := ParseSkillRef(tt.ref) + if tt.wantErr { + if err == nil { + t.Fatalf("ParseSkillRef(%q) = (%q, %q), want error", tt.ref, name, version) + } + return + } + if err != nil { + t.Fatal(err) + } + if name != tt.wantName || version != tt.wantVersion { + t.Errorf("ParseSkillRef(%q) = (%q, %q), want (%q, %q)", tt.ref, name, version, tt.wantName, tt.wantVersion) + } + }) + } +} + +// TestSkillTag2_Golden pins the documented obol interim form of the +// ERC-8239 draft (PR #1704) tag2: +// eip155::::@. +func TestSkillTag2_Golden(t *testing.T) { + tests := []struct { + name string + net NetworkConfig + agentID *big.Int + ref string + want string + }{ + { + name: "base-sepolia", + net: BaseSepolia, + agentID: big.NewInt(42), + ref: "buy-x402@0.1.0", + want: "eip155:84532:0x8004a818bfb912233c491871b3d84c89a494bd9e:42:buy-x402@0.1.0", + }, + { + name: "base mainnet", + net: Base, + agentID: big.NewInt(7), + ref: "monetize@1.2.3", + want: "eip155:8453:0x8004a169fb4a3325136eb29fa0ceb6d2e539a432:7:monetize@1.2.3", + }, + { + name: "ethereum mainnet", + net: Ethereum, + agentID: big.NewInt(1001), + ref: "quant@0.0.1", + want: "eip155:1:0x8004a169fb4a3325136eb29fa0ceb6d2e539a432:1001:quant@0.0.1", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := SkillTag2(tt.net, tt.agentID, tt.ref) + if err != nil { + t.Fatal(err) + } + if got != tt.want { + t.Errorf("SkillTag2 = %q, want %q", got, tt.want) + } + // The registry segment must be lowercase: tags are + // exact-match strings on-chain. + if got != strings.ToLower(got) { + t.Errorf("SkillTag2 = %q contains uppercase", got) + } + }) + } +} + +func TestSkillTag2_BadInput(t *testing.T) { + tests := []struct { + name string + agentID *big.Int + ref string + }{ + {name: "nil agent id", agentID: nil, ref: "buy-x402@0.1.0"}, + {name: "negative agent id", agentID: big.NewInt(-1), ref: "buy-x402@0.1.0"}, + {name: "ref without version", agentID: big.NewInt(1), ref: "buy-x402"}, + {name: "empty ref", agentID: big.NewInt(1), ref: ""}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if _, err := SkillTag2(BaseSepolia, tt.agentID, tt.ref); err == nil { + t.Fatal("expected error, got nil") + } + }) + } +} + +func TestSkillHashMetadataKey(t *testing.T) { + if got := SkillHashMetadataKey("buy-x402@0.1.0"); got != "skill.sha256:buy-x402@0.1.0" { + t.Fatalf("SkillHashMetadataKey = %q, want %q", got, "skill.sha256:buy-x402@0.1.0") + } +} + +// TestEncodeSetMetadata_Golden pins the exact calldata for fixed inputs +// and cross-checks the 4-byte selector against keccak256 of the +// canonical signature. +func TestEncodeSetMetadata_Golden(t *testing.T) { + const ( + wantSelector = "466648da" // keccak256("setMetadata(uint256,string,bytes)")[:4] + wantCalldata = "466648da" + + "000000000000000000000000000000000000000000000000000000000000002a" + + "0000000000000000000000000000000000000000000000000000000000000060" + + "00000000000000000000000000000000000000000000000000000000000000a0" + + "000000000000000000000000000000000000000000000000000000000000001b" + + "736b696c6c2e7368613235363a6275792d7834303240302e312e300000000000" + + "0000000000000000000000000000000000000000000000000000000000000040" + + "3966383664303831383834633764363539613266656161306335356164303135" + + "6133626634663162326230623832326364313564366331356230663030613038" + ) + + if got := hex.EncodeToString(crypto.Keccak256([]byte("setMetadata(uint256,string,bytes)"))[:4]); got != wantSelector { + t.Fatalf("keccak selector = %s, want %s", got, wantSelector) + } + + hash := "9f86d081884c7d659a2feaa0c55ad015a3bf4f1b2b0b822cd15d6c15b0f00a08" + data, err := EncodeSetMetadata(big.NewInt(42), SkillHashMetadataKey("buy-x402@0.1.0"), []byte(hash)) + if err != nil { + t.Fatal(err) + } + if got := hex.EncodeToString(data[:4]); got != wantSelector { + t.Errorf("calldata selector = %s, want %s", got, wantSelector) + } + if got := hex.EncodeToString(data); got != wantCalldata { + t.Errorf("calldata = %s\nwant %s", got, wantCalldata) + } +} + +func TestEncodeSetMetadata_RoundTrip(t *testing.T) { + agentID := big.NewInt(123456) + key := SkillHashMetadataKey("monetize@2.0.0") + value := []byte(strings.Repeat("ab", 32)) + + data, err := EncodeSetMetadata(agentID, key, value) + if err != nil { + t.Fatal(err) + } + + decoded, err := DecodeSetMetadataCalldata(data) + if err != nil { + t.Fatal(err) + } + if decoded.AgentID.Cmp(agentID) != 0 { + t.Errorf("agentID = %s, want %s", decoded.AgentID, agentID) + } + if decoded.Key != key { + t.Errorf("key = %q, want %q", decoded.Key, key) + } + if !bytes.Equal(decoded.Value, value) { + t.Errorf("value = %x, want %x", decoded.Value, value) + } +} + +func TestEncodeSetMetadata_BadInput(t *testing.T) { + tests := []struct { + name string + agentID *big.Int + key string + }{ + {name: "nil agent id", agentID: nil, key: "skill.sha256:a@1"}, + {name: "negative agent id", agentID: big.NewInt(-5), key: "skill.sha256:a@1"}, + {name: "empty key", agentID: big.NewInt(1), key: ""}, + {name: "whitespace key", agentID: big.NewInt(1), key: " "}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if _, err := EncodeSetMetadata(tt.agentID, tt.key, []byte("x")); err == nil { + t.Fatal("expected error, got nil") + } + }) + } +} + +func TestDecodeSetMetadataCalldata_Errors(t *testing.T) { + // Wrong selector (giveFeedback's) must be rejected. + wrong, err := EncodeGiveFeedback(big.NewInt(1), big.NewInt(1), 0, "", "", "", "", common.Hash{}) + if err != nil { + t.Fatal(err) + } + if _, err := DecodeSetMetadataCalldata(wrong); err == nil { + t.Fatal("expected selector mismatch error, got nil") + } + if _, err := DecodeSetMetadataCalldata([]byte{0x01}); err == nil { + t.Fatal("expected too-short error, got nil") + } +} + +// TestEncodeGiveFeedback_SkillTags_Golden pins the full calldata of a +// skill rating: tag1="asr:skill", tag2 in the documented interim +// ERC-8239 form, score 95/100 with no fixed-point scaling. +func TestEncodeGiveFeedback_SkillTags_Golden(t *testing.T) { + const wantCalldata = "3c036a7e000000000000000000000000000000000000000000000000000000000000002a000000000000000000000000000000000000000000000000000000000000005f00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000100000000000000000000000000000000000000000000000000000000000000014000000000000000000000000000000000000000000000000000000000000001c000000000000000000000000000000000000000000000000000000000000001e0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000096173723a736b696c6c000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000496569703135353a38343533323a3078383030346138313862666239313232333363343931383731623364383463383961343934626439653a34323a6275792d7834303240302e312e30000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" + + tag2, err := SkillTag2(BaseSepolia, big.NewInt(42), "buy-x402@0.1.0") + if err != nil { + t.Fatal(err) + } + data, err := EncodeGiveFeedback(big.NewInt(42), big.NewInt(95), 0, SkillTag1, tag2, "", "", common.Hash{}) + if err != nil { + t.Fatal(err) + } + + if got := hex.EncodeToString(data[:4]); got != "3c036a7e" { + t.Errorf("selector = %s, want 3c036a7e (giveFeedback)", got) + } + if got := hex.EncodeToString(data); got != wantCalldata { + t.Errorf("calldata mismatch:\n got %s\nwant %s", got, wantCalldata) + } + + // And it must decode back to the skill-tag pair. + decoded, err := DecodeGiveFeedbackCalldata(data) + if err != nil { + t.Fatal(err) + } + if decoded.Tag1 != SkillTag1 { + t.Errorf("tag1 = %q, want %q", decoded.Tag1, SkillTag1) + } + if decoded.Tag2 != tag2 { + t.Errorf("tag2 = %q, want %q", decoded.Tag2, tag2) + } + if decoded.Value.Int64() != 95 { + t.Errorf("value = %s, want 95", decoded.Value) + } +} diff --git a/internal/erc8004/smoke.go b/internal/erc8004/smoke.go new file mode 100644 index 00000000..9f50faf8 --- /dev/null +++ b/internal/erc8004/smoke.go @@ -0,0 +1,36 @@ +// Smoke-test ↔ ERC-8004 grounding: the smoke-test request hash binds an +// operator's on-chain validationResponse to one specific (target, run) pair, +// so a published smoke report (committed to a public GitHub repo) can be +// checked against a chain-anchored verdict entry. + +package erc8004 + +import ( + "strings" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/crypto" +) + +// smokeTestDomain is the versioned domain prefix for smoke-test request +// hashes. Changing it is a breaking change for every published verdict. +const smokeTestDomain = "obol/smoke-test/v1" + +// normalizeSmokeTarget canonicalizes the probed base URL exactly the way the +// in-pod smoke-test skill does (python `.strip().rstrip("/")`): surrounding +// whitespace and trailing slashes never change the request hash. +func normalizeSmokeTarget(u string) string { + return strings.TrimRight(strings.TrimSpace(u), "/") +} + +// SmokeTestRequestHash derives the ERC-8004 validation request hash for one +// smoke-test run: keccak256 of the exact ASCII bytes +// "obol/smoke-test/v1||". The CLI +// (`obol smoke calldata`, operator side) and any grounding consumer MUST +// compute this identically. The in-pod skill never computes it — there is no +// reliable keccak256 in the pod's python stdlib (hashlib.sha3_256 is NIST +// SHA-3, not keccak256) — it only echoes the normalized target into +// results.json. +func SmokeTestRequestHash(targetBaseURL, runID string) common.Hash { + return crypto.Keccak256Hash([]byte(smokeTestDomain + "|" + normalizeSmokeTarget(targetBaseURL) + "|" + strings.TrimSpace(runID))) +} diff --git a/internal/erc8004/smoke_test.go b/internal/erc8004/smoke_test.go new file mode 100644 index 00000000..49a63737 --- /dev/null +++ b/internal/erc8004/smoke_test.go @@ -0,0 +1,47 @@ +package erc8004 + +import "testing" + +// TestSmokeTestRequestHash_Golden pins the exact preimage layout +// ("obol/smoke-test/v1||"). The operator +// submits validationResponses against this hash via `obol smoke calldata` and +// grounding consumers match it on-chain — any drift silently breaks +// grounding, so the vector is hardcoded, not recomputed. +func TestSmokeTestRequestHash_Golden(t *testing.T) { + const ( + target = "http://obol.stack:8080" + runID = "20260101T000000Z-ab12cd" + golden = "0x2a28aa12a52a28414de4933bbe8d1e52e42828ba08006748f544596823ce7a57" + ) + + if got := SmokeTestRequestHash(target, runID).Hex(); got != golden { + t.Errorf("SmokeTestRequestHash = %s, want %s", got, golden) + } + + // The target is normalized exactly like the in-pod skill's + // `.strip().rstrip("/")`: trailing-slash and surrounding-whitespace + // variants of the same target MUST hash identically, and a padded runId + // is trimmed. + variants := []struct { + name, target, runID string + }{ + {"trailing slash", target + "/", runID}, + {"double trailing slash", target + "//", runID}, + {"surrounding whitespace", " " + target + " \n", runID}, + {"whitespace + slash", " " + target + "/ ", runID}, + {"padded runId", target, " " + runID + "\t"}, + } + for _, v := range variants { + if got := SmokeTestRequestHash(v.target, v.runID).Hex(); got != golden { + t.Errorf("%s: hash = %s, want %s (normalization must be hash-invariant)", v.name, got, golden) + } + } + + // Different target or runId must never collide with the golden pair. + if SmokeTestRequestHash("http://other.example:8080", runID).Hex() == golden { + t.Error("different target produced the golden hash") + } + if SmokeTestRequestHash(target, "20260101T000000Z-ffffff").Hex() == golden { + t.Error("different runId produced the golden hash") + } +} diff --git a/internal/erc8004/validation.go b/internal/erc8004/validation.go new file mode 100644 index 00000000..8aa46a8b --- /dev/null +++ b/internal/erc8004/validation.go @@ -0,0 +1,401 @@ +package erc8004 + +// ERC-8004 Validation Registry (v2.0.0) calldata builders and read helpers. +// +// IMPORTANT — signing model: the serviceoffer/servicebounty controller NEVER +// signs validation transactions. Poster agents submit validationRequest and +// evaluator agents submit validationResponse with THEIR OWN wallets; this +// package only builds calldata for them and reads/records results on-chain. +// +// Function signatures verified against: +// - Spec: https://eips.ethereum.org/EIPS/eip-8004 (Validation Registry) +// - Reference impl + official ABI: +// https://github.com/erc-8004/erc-8004-contracts +// (abis/ValidationRegistry.json, contracts/ValidationRegistryUpgradeable.sol, +// getVersion() == "2.0.0") +// +// validationRequest(address validatorAddress, uint256 agentId, string requestURI, bytes32 requestHash) +// validationResponse(bytes32 requestHash, uint8 response, string responseURI, bytes32 responseHash, string tag) +// getValidationStatus(bytes32 requestHash) -> (address, uint256, uint8, bytes32, string, uint256) +// getSummary(uint256 agentId, address[] validatorAddresses, string tag) -> (uint64 count, uint8 avgResponse) +// getAgentValidations(uint256 agentId) -> bytes32[] +// getValidatorRequests(address validatorAddress) -> bytes32[] + +import ( + "bytes" + "context" + _ "embed" + "fmt" + "math/big" + "strings" + "sync" + + "github.com/ethereum/go-ethereum/accounts/abi" + "github.com/ethereum/go-ethereum/accounts/abi/bind" + "github.com/ethereum/go-ethereum/common" +) + +//go:embed validation_registry.abi.json +var validationRegistryABI string + +const ( + // ValidationRegistryV2BaseSepolia is the ERC-8004 v2.0.0 Validation + // Registry on Base Sepolia (CREATE2 vanity proxy, same address on all + // supported testnets). + // + // NOTE: this intentionally differs from the legacy + // ValidationRegistryBaseSepolia constant in abi.go + // (0x8004CB39f29c09145F24Ad9dDe2A108C1A2cdfC5): that address has NO code + // on Base Sepolia — it is a v1.0.0 deployment that only exists on + // Ethereum Sepolia (verified via eth_getCode + getVersion(), 2026-06-10). + // Source: https://github.com/erc-8004/erc-8004-contracts + // (scripts/addresses.ts TESTNET_ADDRESSES.validationRegistry); on-chain: + // getVersion() == "2.0.0", getIdentityRegistry() == + // IdentityRegistryBaseSepolia. + ValidationRegistryV2BaseSepolia = "0x8004Cb1BF31DAf7788923b405b754f57acEB4272" + + // ValidationRegistryV2Mainnet is the ERC-8004 v2.0.0 Validation Registry + // on Ethereum mainnet and Base mainnet (deployed at the same address via + // CREATE2). Source: https://github.com/erc-8004/erc-8004-contracts + // (scripts/addresses.ts MAINNET_ADDRESSES.validationRegistry); on-chain: + // code present on both chains, getVersion() == "2.0.0", + // getIdentityRegistry() == IdentityRegistryMainnet. + ValidationRegistryV2Mainnet = "0x8004Cc8439f36fd5F9F049D9fF86523Df6dAAB58" + + // MaxValidationResponse is the maximum validationResponse score. The + // contract reverts with "resp>100" above this. + MaxValidationResponse = 100 +) + +var ( + validationABIOnce sync.Once + validationABIParsed abi.ABI + validationABIErr error +) + +// validationABI lazily parses the embedded Validation Registry ABI once. +func validationABI() (abi.ABI, error) { + validationABIOnce.Do(func() { + validationABIParsed, validationABIErr = abi.JSON(strings.NewReader(validationRegistryABI)) + }) + if validationABIErr != nil { + return abi.ABI{}, fmt.Errorf("erc8004: parse validation registry abi: %w", validationABIErr) + } + return validationABIParsed, nil +} + +// ValidationRegistryAddress maps a supported network name to the deployed +// ERC-8004 v2.0.0 Validation Registry address. It accepts the same aliases as +// ResolveNetwork. Networks without an on-chain-verified deployment return an +// error rather than a guessed address. +func ValidationRegistryAddress(network string) (string, error) { + net, err := ResolveNetwork(network) + if err != nil { + return "", fmt.Errorf("erc8004: validation registry: %w", err) + } + switch net.Name { + case BaseSepolia.Name: + return ValidationRegistryV2BaseSepolia, nil + case Base.Name, Ethereum.Name: + return ValidationRegistryV2Mainnet, nil + default: + return "", fmt.Errorf("erc8004: no verified validation registry deployment for network %q", net.Name) + } +} + +// checkAgentID rejects agent ids that cannot be ABI-encoded as uint256. +func checkAgentID(agentID *big.Int) error { + if agentID == nil { + return fmt.Errorf("erc8004: agentId must not be nil") + } + if agentID.Sign() < 0 { + return fmt.Errorf("erc8004: agentId must not be negative (got %s)", agentID) + } + if agentID.BitLen() > 256 { + return fmt.Errorf("erc8004: agentId does not fit in uint256") + } + return nil +} + +// unpackCalldata verifies the 4-byte selector against the named method and +// unpacks the argument payload. +func unpackCalldata(parsed abi.ABI, name string, data []byte) ([]interface{}, error) { + method, ok := parsed.Methods[name] + if !ok { + return nil, fmt.Errorf("erc8004: method %q not in ABI", name) + } + if len(data) < 4 { + return nil, fmt.Errorf("erc8004: calldata too short (%d bytes, need at least 4)", len(data)) + } + if !bytes.Equal(data[:4], method.ID) { + return nil, fmt.Errorf("erc8004: selector mismatch: got 0x%x, want 0x%x (%s)", data[:4], method.ID, method.Sig) + } + values, err := method.Inputs.Unpack(data[4:]) + if err != nil { + return nil, fmt.Errorf("erc8004: unpack %s calldata: %w", name, err) + } + return values, nil +} + +// EncodeValidationRequest builds calldata for +// validationRequest(address,uint256,string,bytes32). The transaction must be +// submitted by the owner or an approved operator of agentId (the poster +// agent's own wallet) — never by the controller. +func EncodeValidationRequest(validatorAddress common.Address, agentID *big.Int, requestURI string, requestHash common.Hash) ([]byte, error) { + if validatorAddress == (common.Address{}) { + return nil, fmt.Errorf("erc8004: validatorAddress must not be the zero address") + } + if err := checkAgentID(agentID); err != nil { + return nil, err + } + if requestHash == (common.Hash{}) { + return nil, fmt.Errorf("erc8004: requestHash must not be the zero hash") + } + + parsed, err := validationABI() + if err != nil { + return nil, err + } + data, err := parsed.Pack("validationRequest", validatorAddress, agentID, requestURI, requestHash) + if err != nil { + return nil, fmt.Errorf("erc8004: pack validationRequest: %w", err) + } + return data, nil +} + +// EncodeValidationResponse builds calldata for +// validationResponse(bytes32,uint8,string,bytes32,string). response is the +// 0-100 score; the transaction must be submitted by the validator address +// named in the matching validationRequest (the evaluator's own wallet) — +// never by the controller. responseURI, responseHash, and tag are optional +// per spec and may be zero values. +func EncodeValidationResponse(requestHash common.Hash, response uint8, responseURI string, responseHash common.Hash, tag string) ([]byte, error) { + if requestHash == (common.Hash{}) { + return nil, fmt.Errorf("erc8004: requestHash must not be the zero hash") + } + if response > MaxValidationResponse { + return nil, fmt.Errorf("erc8004: response %d out of range [0,%d]", response, MaxValidationResponse) + } + + parsed, err := validationABI() + if err != nil { + return nil, err + } + data, err := parsed.Pack("validationResponse", requestHash, response, responseURI, responseHash, tag) + if err != nil { + return nil, fmt.Errorf("erc8004: pack validationResponse: %w", err) + } + return data, nil +} + +// ValidationRequestCall is the decoded argument set of a validationRequest call. +type ValidationRequestCall struct { + ValidatorAddress common.Address + AgentID *big.Int + RequestURI string + RequestHash common.Hash +} + +// DecodeValidationRequestCalldata decodes validationRequest calldata +// (selector + ABI-encoded args). Useful for provenance checks on observed +// transactions and for tests. +func DecodeValidationRequestCalldata(data []byte) (ValidationRequestCall, error) { + parsed, err := validationABI() + if err != nil { + return ValidationRequestCall{}, err + } + values, err := unpackCalldata(parsed, "validationRequest", data) + if err != nil { + return ValidationRequestCall{}, err + } + if len(values) != 4 { + return ValidationRequestCall{}, fmt.Errorf("erc8004: validationRequest arg count = %d, want 4", len(values)) + } + + out := ValidationRequestCall{} + var ok bool + if out.ValidatorAddress, ok = values[0].(common.Address); !ok { + return ValidationRequestCall{}, fmt.Errorf("erc8004: validatorAddress type = %T", values[0]) + } + if out.AgentID, ok = values[1].(*big.Int); !ok { + return ValidationRequestCall{}, fmt.Errorf("erc8004: agentId type = %T", values[1]) + } + if out.RequestURI, ok = values[2].(string); !ok { + return ValidationRequestCall{}, fmt.Errorf("erc8004: requestURI type = %T", values[2]) + } + hash, ok := values[3].([32]byte) + if !ok { + return ValidationRequestCall{}, fmt.Errorf("erc8004: requestHash type = %T", values[3]) + } + out.RequestHash = common.Hash(hash) + return out, nil +} + +// ValidationResponseCall is the decoded argument set of a validationResponse call. +type ValidationResponseCall struct { + RequestHash common.Hash + Response uint8 + ResponseURI string + ResponseHash common.Hash + Tag string +} + +// DecodeValidationResponseCalldata decodes validationResponse calldata +// (selector + ABI-encoded args). Useful for provenance checks on observed +// evaluator transactions and for tests. +func DecodeValidationResponseCalldata(data []byte) (ValidationResponseCall, error) { + parsed, err := validationABI() + if err != nil { + return ValidationResponseCall{}, err + } + values, err := unpackCalldata(parsed, "validationResponse", data) + if err != nil { + return ValidationResponseCall{}, err + } + if len(values) != 5 { + return ValidationResponseCall{}, fmt.Errorf("erc8004: validationResponse arg count = %d, want 5", len(values)) + } + + out := ValidationResponseCall{} + reqHash, ok := values[0].([32]byte) + if !ok { + return ValidationResponseCall{}, fmt.Errorf("erc8004: requestHash type = %T", values[0]) + } + out.RequestHash = common.Hash(reqHash) + if out.Response, ok = values[1].(uint8); !ok { + return ValidationResponseCall{}, fmt.Errorf("erc8004: response type = %T", values[1]) + } + if out.ResponseURI, ok = values[2].(string); !ok { + return ValidationResponseCall{}, fmt.Errorf("erc8004: responseURI type = %T", values[2]) + } + respHash, ok := values[3].([32]byte) + if !ok { + return ValidationResponseCall{}, fmt.Errorf("erc8004: responseHash type = %T", values[3]) + } + out.ResponseHash = common.Hash(respHash) + if out.Tag, ok = values[4].(string); !ok { + return ValidationResponseCall{}, fmt.Errorf("erc8004: tag type = %T", values[4]) + } + return out, nil +} + +// ValidationStatus mirrors getValidationStatus(bytes32) return values. +type ValidationStatus struct { + ValidatorAddress common.Address + AgentID *big.Int + Response uint8 + ResponseHash common.Hash + Tag string + LastUpdate *big.Int +} + +// ValidationReader provides read-only access to a Validation Registry. The +// controller uses it to observe evaluator responses; it holds no signer. +type ValidationReader struct { + contract *bind.BoundContract +} + +// NewValidationReader binds a read-only Validation Registry at +// registryAddress. caller is typically (*erc8004.Client).ETH() or any +// *ethclient.Client. +func NewValidationReader(caller bind.ContractCaller, registryAddress string) (*ValidationReader, error) { + if caller == nil { + return nil, fmt.Errorf("erc8004: validation reader: caller must not be nil") + } + if !common.IsHexAddress(registryAddress) { + return nil, fmt.Errorf("erc8004: validation reader: invalid registry address %q", registryAddress) + } + parsed, err := validationABI() + if err != nil { + return nil, err + } + return &ValidationReader{ + contract: bind.NewBoundContract(common.HexToAddress(registryAddress), parsed, caller, nil, nil), + }, nil +} + +// ValidationStatus reads getValidationStatus(requestHash). +func (r *ValidationReader) ValidationStatus(ctx context.Context, requestHash common.Hash) (ValidationStatus, error) { + var out []interface{} + if err := r.contract.Call(&bind.CallOpts{Context: ctx}, &out, "getValidationStatus", requestHash); err != nil { + return ValidationStatus{}, fmt.Errorf("erc8004: getValidationStatus: %w", err) + } + if len(out) != 6 { + return ValidationStatus{}, fmt.Errorf("erc8004: getValidationStatus returned %d values, want 6", len(out)) + } + + status := ValidationStatus{} + var ok bool + if status.ValidatorAddress, ok = out[0].(common.Address); !ok { + return ValidationStatus{}, fmt.Errorf("erc8004: getValidationStatus validatorAddress type = %T", out[0]) + } + if status.AgentID, ok = out[1].(*big.Int); !ok { + return ValidationStatus{}, fmt.Errorf("erc8004: getValidationStatus agentId type = %T", out[1]) + } + if status.Response, ok = out[2].(uint8); !ok { + return ValidationStatus{}, fmt.Errorf("erc8004: getValidationStatus response type = %T", out[2]) + } + respHash, ok := out[3].([32]byte) + if !ok { + return ValidationStatus{}, fmt.Errorf("erc8004: getValidationStatus responseHash type = %T", out[3]) + } + status.ResponseHash = common.Hash(respHash) + if status.Tag, ok = out[4].(string); !ok { + return ValidationStatus{}, fmt.Errorf("erc8004: getValidationStatus tag type = %T", out[4]) + } + if status.LastUpdate, ok = out[5].(*big.Int); !ok { + return ValidationStatus{}, fmt.Errorf("erc8004: getValidationStatus lastUpdate type = %T", out[5]) + } + return status, nil +} + +// Summary reads getSummary(agentId, validatorAddresses, tag) and returns the +// response count and 0-100 average. +func (r *ValidationReader) Summary(ctx context.Context, agentID *big.Int, validatorAddresses []common.Address, tag string) (count uint64, avgResponse uint8, err error) { + if err := checkAgentID(agentID); err != nil { + return 0, 0, err + } + if validatorAddresses == nil { + validatorAddresses = []common.Address{} + } + var out []interface{} + if err := r.contract.Call(&bind.CallOpts{Context: ctx}, &out, "getSummary", agentID, validatorAddresses, tag); err != nil { + return 0, 0, fmt.Errorf("erc8004: validation getSummary: %w", err) + } + if len(out) != 2 { + return 0, 0, fmt.Errorf("erc8004: validation getSummary returned %d values, want 2", len(out)) + } + count, ok := out[0].(uint64) + if !ok { + return 0, 0, fmt.Errorf("erc8004: validation getSummary count type = %T", out[0]) + } + avgResponse, ok = out[1].(uint8) + if !ok { + return 0, 0, fmt.Errorf("erc8004: validation getSummary avgResponse type = %T", out[1]) + } + return count, avgResponse, nil +} + +// AgentValidations reads getAgentValidations(agentId) — all request hashes +// recorded for the agent. +func (r *ValidationReader) AgentValidations(ctx context.Context, agentID *big.Int) ([]common.Hash, error) { + if err := checkAgentID(agentID); err != nil { + return nil, err + } + var out []interface{} + if err := r.contract.Call(&bind.CallOpts{Context: ctx}, &out, "getAgentValidations", agentID); err != nil { + return nil, fmt.Errorf("erc8004: getAgentValidations: %w", err) + } + if len(out) != 1 { + return nil, fmt.Errorf("erc8004: getAgentValidations returned %d values, want 1", len(out)) + } + raw, ok := out[0].([][32]byte) + if !ok { + return nil, fmt.Errorf("erc8004: getAgentValidations type = %T", out[0]) + } + hashes := make([]common.Hash, len(raw)) + for i, h := range raw { + hashes[i] = common.Hash(h) + } + return hashes, nil +} diff --git a/internal/erc8004/validation_registry.abi.json b/internal/erc8004/validation_registry.abi.json new file mode 100644 index 00000000..a73a65bb --- /dev/null +++ b/internal/erc8004/validation_registry.abi.json @@ -0,0 +1,272 @@ +[ + { + "inputs": [ + { + "internalType": "address", + "name": "validatorAddress", + "type": "address" + }, + { + "internalType": "uint256", + "name": "agentId", + "type": "uint256" + }, + { + "internalType": "string", + "name": "requestURI", + "type": "string" + }, + { + "internalType": "bytes32", + "name": "requestHash", + "type": "bytes32" + } + ], + "name": "validationRequest", + "outputs": [], + "stateMutability": "nonpayable", + "type": "function" + }, + { + "inputs": [ + { + "internalType": "bytes32", + "name": "requestHash", + "type": "bytes32" + }, + { + "internalType": "uint8", + "name": "response", + "type": "uint8" + }, + { + "internalType": "string", + "name": "responseURI", + "type": "string" + }, + { + "internalType": "bytes32", + "name": "responseHash", + "type": "bytes32" + }, + { + "internalType": "string", + "name": "tag", + "type": "string" + } + ], + "name": "validationResponse", + "outputs": [], + "stateMutability": "nonpayable", + "type": "function" + }, + { + "inputs": [ + { + "internalType": "bytes32", + "name": "requestHash", + "type": "bytes32" + } + ], + "name": "getValidationStatus", + "outputs": [ + { + "internalType": "address", + "name": "validatorAddress", + "type": "address" + }, + { + "internalType": "uint256", + "name": "agentId", + "type": "uint256" + }, + { + "internalType": "uint8", + "name": "response", + "type": "uint8" + }, + { + "internalType": "bytes32", + "name": "responseHash", + "type": "bytes32" + }, + { + "internalType": "string", + "name": "tag", + "type": "string" + }, + { + "internalType": "uint256", + "name": "lastUpdate", + "type": "uint256" + } + ], + "stateMutability": "view", + "type": "function" + }, + { + "inputs": [ + { + "internalType": "uint256", + "name": "agentId", + "type": "uint256" + }, + { + "internalType": "address[]", + "name": "validatorAddresses", + "type": "address[]" + }, + { + "internalType": "string", + "name": "tag", + "type": "string" + } + ], + "name": "getSummary", + "outputs": [ + { + "internalType": "uint64", + "name": "count", + "type": "uint64" + }, + { + "internalType": "uint8", + "name": "avgResponse", + "type": "uint8" + } + ], + "stateMutability": "view", + "type": "function" + }, + { + "inputs": [ + { + "internalType": "uint256", + "name": "agentId", + "type": "uint256" + } + ], + "name": "getAgentValidations", + "outputs": [ + { + "internalType": "bytes32[]", + "name": "", + "type": "bytes32[]" + } + ], + "stateMutability": "view", + "type": "function" + }, + { + "inputs": [ + { + "internalType": "address", + "name": "validatorAddress", + "type": "address" + } + ], + "name": "getValidatorRequests", + "outputs": [ + { + "internalType": "bytes32[]", + "name": "", + "type": "bytes32[]" + } + ], + "stateMutability": "view", + "type": "function" + }, + { + "inputs": [], + "name": "getIdentityRegistry", + "outputs": [ + { + "internalType": "address", + "name": "", + "type": "address" + } + ], + "stateMutability": "view", + "type": "function" + }, + { + "anonymous": false, + "inputs": [ + { + "indexed": true, + "internalType": "address", + "name": "validatorAddress", + "type": "address" + }, + { + "indexed": true, + "internalType": "uint256", + "name": "agentId", + "type": "uint256" + }, + { + "indexed": false, + "internalType": "string", + "name": "requestURI", + "type": "string" + }, + { + "indexed": true, + "internalType": "bytes32", + "name": "requestHash", + "type": "bytes32" + } + ], + "name": "ValidationRequest", + "type": "event" + }, + { + "anonymous": false, + "inputs": [ + { + "indexed": true, + "internalType": "address", + "name": "validatorAddress", + "type": "address" + }, + { + "indexed": true, + "internalType": "uint256", + "name": "agentId", + "type": "uint256" + }, + { + "indexed": true, + "internalType": "bytes32", + "name": "requestHash", + "type": "bytes32" + }, + { + "indexed": false, + "internalType": "uint8", + "name": "response", + "type": "uint8" + }, + { + "indexed": false, + "internalType": "string", + "name": "responseURI", + "type": "string" + }, + { + "indexed": false, + "internalType": "bytes32", + "name": "responseHash", + "type": "bytes32" + }, + { + "indexed": false, + "internalType": "string", + "name": "tag", + "type": "string" + } + ], + "name": "ValidationResponse", + "type": "event" + } +] diff --git a/internal/erc8004/validation_test.go b/internal/erc8004/validation_test.go new file mode 100644 index 00000000..939bbf5a --- /dev/null +++ b/internal/erc8004/validation_test.go @@ -0,0 +1,404 @@ +package erc8004 + +import ( + "context" + "encoding/hex" + "math/big" + "strings" + "testing" + + ethereum "github.com/ethereum/go-ethereum" + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/crypto" +) + +// stubCaller is a bind.ContractCaller that returns canned ABI-encoded output. +// Shared by validation and reputation reader tests. Never hits the network. +type stubCaller struct { + ret []byte + err error + lastCall ethereum.CallMsg +} + +func (s *stubCaller) CodeAt(_ context.Context, _ common.Address, _ *big.Int) ([]byte, error) { + return []byte{0x01}, nil +} + +func (s *stubCaller) CallContract(_ context.Context, call ethereum.CallMsg, _ *big.Int) ([]byte, error) { + s.lastCall = call + return s.ret, s.err +} + +func TestValidationABI_Parses(t *testing.T) { + if _, err := validationABI(); err != nil { + t.Fatalf("embedded validation ABI failed to parse: %v", err) + } +} + +// TestValidationABI_SelectorGoldenValues pins the 4-byte selectors of the +// verified v2.0.0 signatures (spec: https://eips.ethereum.org/EIPS/eip-8004; +// ABI: https://github.com/erc-8004/erc-8004-contracts). Each golden value is +// cross-checked against keccak256 of the canonical signature string and the +// parsed ABI method. +func TestValidationABI_SelectorGoldenValues(t *testing.T) { + parsed, err := validationABI() + if err != nil { + t.Fatal(err) + } + + tests := []struct { + method string + sig string + selector string + }{ + {"validationRequest", "validationRequest(address,uint256,string,bytes32)", "aaf400c4"}, + {"validationResponse", "validationResponse(bytes32,uint8,string,bytes32,string)", "3d659a96"}, + {"getValidationStatus", "getValidationStatus(bytes32)", "ff2febfc"}, + {"getSummary", "getSummary(uint256,address[],string)", "1b7cabd6"}, + {"getAgentValidations", "getAgentValidations(uint256)", "8d5d0c2d"}, + {"getValidatorRequests", "getValidatorRequests(address)", "4bf3158c"}, + {"getIdentityRegistry", "getIdentityRegistry()", "bc4d861b"}, + } + + for _, tt := range tests { + t.Run(tt.method, func(t *testing.T) { + m, ok := parsed.Methods[tt.method] + if !ok { + t.Fatalf("method %q missing from parsed ABI", tt.method) + } + if m.Sig != tt.sig { + t.Errorf("signature = %q, want %q", m.Sig, tt.sig) + } + if got := hex.EncodeToString(m.ID); got != tt.selector { + t.Errorf("parsed selector = 0x%s, want 0x%s", got, tt.selector) + } + if got := hex.EncodeToString(crypto.Keccak256([]byte(tt.sig))[:4]); got != tt.selector { + t.Errorf("keccak256(%q)[:4] = 0x%s, want 0x%s", tt.sig, got, tt.selector) + } + }) + } +} + +func TestValidationABI_EventsPresent(t *testing.T) { + parsed, err := validationABI() + if err != nil { + t.Fatal(err) + } + for _, name := range []string{"ValidationRequest", "ValidationResponse"} { + if _, ok := parsed.Events[name]; !ok { + t.Errorf("missing event %q in parsed ABI", name) + } + } +} + +func TestEncodeValidationRequest_RoundTrip(t *testing.T) { + validator := common.HexToAddress("0x1111111111111111111111111111111111111111") + agentID := big.NewInt(42) + requestURI := "https://example.org/bounty/42/request.json" + requestHash := crypto.Keccak256Hash([]byte("request payload")) + + data, err := EncodeValidationRequest(validator, agentID, requestURI, requestHash) + if err != nil { + t.Fatalf("EncodeValidationRequest: %v", err) + } + if got := hex.EncodeToString(data[:4]); got != "aaf400c4" { + t.Errorf("selector = 0x%s, want 0xaaf400c4", got) + } + + decoded, err := DecodeValidationRequestCalldata(data) + if err != nil { + t.Fatalf("DecodeValidationRequestCalldata: %v", err) + } + if decoded.ValidatorAddress != validator { + t.Errorf("validatorAddress = %s, want %s", decoded.ValidatorAddress, validator) + } + if decoded.AgentID.Cmp(agentID) != 0 { + t.Errorf("agentId = %s, want %s", decoded.AgentID, agentID) + } + if decoded.RequestURI != requestURI { + t.Errorf("requestURI = %q, want %q", decoded.RequestURI, requestURI) + } + if decoded.RequestHash != requestHash { + t.Errorf("requestHash = %s, want %s", decoded.RequestHash, requestHash) + } +} + +func TestEncodeValidationResponse_RoundTrip(t *testing.T) { + requestHash := crypto.Keccak256Hash([]byte("request payload")) + responseHash := crypto.Keccak256Hash([]byte("evaluation artifact")) + + data, err := EncodeValidationResponse(requestHash, 87, "ipfs://bafy.../eval.json", responseHash, "code-review") + if err != nil { + t.Fatalf("EncodeValidationResponse: %v", err) + } + if got := hex.EncodeToString(data[:4]); got != "3d659a96" { + t.Errorf("selector = 0x%s, want 0x3d659a96", got) + } + + decoded, err := DecodeValidationResponseCalldata(data) + if err != nil { + t.Fatalf("DecodeValidationResponseCalldata: %v", err) + } + if decoded.RequestHash != requestHash { + t.Errorf("requestHash = %s, want %s", decoded.RequestHash, requestHash) + } + if decoded.Response != 87 { + t.Errorf("response = %d, want 87", decoded.Response) + } + if decoded.ResponseURI != "ipfs://bafy.../eval.json" { + t.Errorf("responseURI = %q", decoded.ResponseURI) + } + if decoded.ResponseHash != responseHash { + t.Errorf("responseHash = %s, want %s", decoded.ResponseHash, responseHash) + } + if decoded.Tag != "code-review" { + t.Errorf("tag = %q, want %q", decoded.Tag, "code-review") + } +} + +func TestEncodeValidationResponse_OptionalFieldsZero(t *testing.T) { + requestHash := crypto.Keccak256Hash([]byte("req")) + data, err := EncodeValidationResponse(requestHash, 0, "", common.Hash{}, "") + if err != nil { + t.Fatalf("EncodeValidationResponse with zero optionals: %v", err) + } + decoded, err := DecodeValidationResponseCalldata(data) + if err != nil { + t.Fatalf("decode: %v", err) + } + if decoded.Response != 0 || decoded.ResponseURI != "" || decoded.Tag != "" || decoded.ResponseHash != (common.Hash{}) { + t.Errorf("zero optionals did not round-trip: %+v", decoded) + } +} + +func TestEncodeValidationRequest_BadInput(t *testing.T) { + validator := common.HexToAddress("0x1111111111111111111111111111111111111111") + hash := crypto.Keccak256Hash([]byte("x")) + + tests := []struct { + name string + fn func() ([]byte, error) + }{ + {"zero validator", func() ([]byte, error) { + return EncodeValidationRequest(common.Address{}, big.NewInt(1), "u", hash) + }}, + {"nil agentId", func() ([]byte, error) { + return EncodeValidationRequest(validator, nil, "u", hash) + }}, + {"negative agentId", func() ([]byte, error) { + return EncodeValidationRequest(validator, big.NewInt(-1), "u", hash) + }}, + {"zero requestHash", func() ([]byte, error) { + return EncodeValidationRequest(validator, big.NewInt(1), "u", common.Hash{}) + }}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if _, err := tt.fn(); err == nil { + t.Error("expected error, got nil") + } + }) + } +} + +func TestEncodeValidationResponse_BadInput(t *testing.T) { + hash := crypto.Keccak256Hash([]byte("x")) + + if _, err := EncodeValidationResponse(common.Hash{}, 50, "", common.Hash{}, ""); err == nil { + t.Error("zero requestHash: expected error, got nil") + } + if _, err := EncodeValidationResponse(hash, 101, "", common.Hash{}, ""); err == nil { + t.Error("response 101: expected error, got nil") + } + if _, err := EncodeValidationResponse(hash, MaxValidationResponse, "", common.Hash{}, ""); err != nil { + t.Errorf("response 100 should be accepted: %v", err) + } +} + +func TestDecodeValidationCalldata_Errors(t *testing.T) { + t.Run("too short", func(t *testing.T) { + if _, err := DecodeValidationResponseCalldata([]byte{0x3d, 0x65}); err == nil { + t.Error("expected error for short calldata") + } + }) + + t.Run("wrong selector", func(t *testing.T) { + // validationRequest calldata fed to the validationResponse decoder. + data, err := EncodeValidationRequest( + common.HexToAddress("0x2222222222222222222222222222222222222222"), + big.NewInt(7), "u", crypto.Keccak256Hash([]byte("y"))) + if err != nil { + t.Fatal(err) + } + if _, err := DecodeValidationResponseCalldata(data); err == nil { + t.Error("expected selector mismatch error") + } else if !strings.Contains(err.Error(), "selector mismatch") { + t.Errorf("error = %v, want selector mismatch", err) + } + }) + + t.Run("truncated args", func(t *testing.T) { + data, err := EncodeValidationResponse(crypto.Keccak256Hash([]byte("z")), 10, "uri", common.Hash{}, "tag") + if err != nil { + t.Fatal(err) + } + if _, err := DecodeValidationResponseCalldata(data[:len(data)-40]); err == nil { + t.Error("expected error for truncated calldata") + } + }) +} + +func TestValidationRegistryAddress(t *testing.T) { + tests := []struct { + network string + want string + wantErr bool + }{ + {"base-sepolia", ValidationRegistryV2BaseSepolia, false}, + {" Base-Sepolia ", ValidationRegistryV2BaseSepolia, false}, + {"base", ValidationRegistryV2Mainnet, false}, + {"base-mainnet", ValidationRegistryV2Mainnet, false}, + {"ethereum", ValidationRegistryV2Mainnet, false}, + {"mainnet", ValidationRegistryV2Mainnet, false}, + {"solana", "", true}, + {"", "", true}, + } + for _, tt := range tests { + t.Run(tt.network, func(t *testing.T) { + got, err := ValidationRegistryAddress(tt.network) + if tt.wantErr { + if err == nil { + t.Errorf("expected error for %q, got address %s", tt.network, got) + } + return + } + if err != nil { + t.Fatalf("ValidationRegistryAddress(%q): %v", tt.network, err) + } + if got != tt.want { + t.Errorf("address = %s, want %s", got, tt.want) + } + }) + } +} + +func TestNewValidationReader_BadInput(t *testing.T) { + if _, err := NewValidationReader(nil, ValidationRegistryV2BaseSepolia); err == nil { + t.Error("nil caller: expected error") + } + if _, err := NewValidationReader(&stubCaller{}, "not-an-address"); err == nil { + t.Error("bad address: expected error") + } +} + +func TestValidationReader_ValidationStatus(t *testing.T) { + parsed, err := validationABI() + if err != nil { + t.Fatal(err) + } + + validator := common.HexToAddress("0x3333333333333333333333333333333333333333") + agentID := big.NewInt(42) + respHash := crypto.Keccak256Hash([]byte("artifact")) + lastUpdate := big.NewInt(1765432100) + + ret, err := parsed.Methods["getValidationStatus"].Outputs.Pack( + validator, agentID, uint8(91), [32]byte(respHash), "code-review", lastUpdate) + if err != nil { + t.Fatalf("pack outputs: %v", err) + } + + caller := &stubCaller{ret: ret} + reader, err := NewValidationReader(caller, ValidationRegistryV2BaseSepolia) + if err != nil { + t.Fatal(err) + } + + reqHash := crypto.Keccak256Hash([]byte("request")) + status, err := reader.ValidationStatus(context.Background(), reqHash) + if err != nil { + t.Fatalf("ValidationStatus: %v", err) + } + + if status.ValidatorAddress != validator { + t.Errorf("validatorAddress = %s, want %s", status.ValidatorAddress, validator) + } + if status.AgentID.Cmp(agentID) != 0 { + t.Errorf("agentId = %s, want %s", status.AgentID, agentID) + } + if status.Response != 91 { + t.Errorf("response = %d, want 91", status.Response) + } + if status.ResponseHash != respHash { + t.Errorf("responseHash = %s, want %s", status.ResponseHash, respHash) + } + if status.Tag != "code-review" { + t.Errorf("tag = %q, want %q", status.Tag, "code-review") + } + if status.LastUpdate.Cmp(lastUpdate) != 0 { + t.Errorf("lastUpdate = %s, want %s", status.LastUpdate, lastUpdate) + } + + // The reader must have issued a getValidationStatus(requestHash) call. + wantData, err := parsed.Pack("getValidationStatus", reqHash) + if err != nil { + t.Fatal(err) + } + if hex.EncodeToString(caller.lastCall.Data) != hex.EncodeToString(wantData) { + t.Errorf("call data = 0x%x, want 0x%x", caller.lastCall.Data, wantData) + } +} + +func TestValidationReader_Summary(t *testing.T) { + parsed, err := validationABI() + if err != nil { + t.Fatal(err) + } + ret, err := parsed.Methods["getSummary"].Outputs.Pack(uint64(5), uint8(78)) + if err != nil { + t.Fatal(err) + } + + reader, err := NewValidationReader(&stubCaller{ret: ret}, ValidationRegistryV2BaseSepolia) + if err != nil { + t.Fatal(err) + } + + count, avg, err := reader.Summary(context.Background(), big.NewInt(42), nil, "") + if err != nil { + t.Fatalf("Summary: %v", err) + } + if count != 5 || avg != 78 { + t.Errorf("summary = (%d, %d), want (5, 78)", count, avg) + } + + if _, _, err := reader.Summary(context.Background(), nil, nil, ""); err == nil { + t.Error("nil agentId: expected error") + } +} + +func TestValidationReader_AgentValidations(t *testing.T) { + parsed, err := validationABI() + if err != nil { + t.Fatal(err) + } + h1 := crypto.Keccak256Hash([]byte("a")) + h2 := crypto.Keccak256Hash([]byte("b")) + ret, err := parsed.Methods["getAgentValidations"].Outputs.Pack([][32]byte{h1, h2}) + if err != nil { + t.Fatal(err) + } + + reader, err := NewValidationReader(&stubCaller{ret: ret}, ValidationRegistryV2BaseSepolia) + if err != nil { + t.Fatal(err) + } + + hashes, err := reader.AgentValidations(context.Background(), big.NewInt(42)) + if err != nil { + t.Fatalf("AgentValidations: %v", err) + } + if len(hashes) != 2 || hashes[0] != h1 || hashes[1] != h2 { + t.Errorf("hashes = %v, want [%s %s]", hashes, h1, h2) + } +} diff --git a/internal/model/model.go b/internal/model/model.go index 4614c4c9..e463a74d 100644 --- a/internal/model/model.go +++ b/internal/model/model.go @@ -37,19 +37,171 @@ const ( ProviderOpenAI = "openai" ) -// Known provider definitions — no need to query the running pod. -var knownProviders = []ProviderInfo{ - {ID: ProviderAnthropic, Name: "Anthropic", EnvVar: "ANTHROPIC_API_KEY", AltEnvVars: []string{"CLAUDE_CODE_OAUTH_TOKEN"}}, - {ID: ProviderOpenAI, Name: "OpenAI", EnvVar: "OPENAI_API_KEY"}, - {ID: ProviderOllama, Name: "Ollama (local)", EnvVar: ""}, -} +// apiMode selects how a provider's LiteLLM model_list entries are shaped. +type apiMode string + +const ( + // modeAnthropic: native LiteLLM anthropic routing + prompt-cache markers + // + an anthropic/* wildcard. Key read from EnvVar. + modeAnthropic apiMode = "anthropic" + // modeOpenAI: native LiteLLM openai/ routing + an openai/* wildcard. + modeOpenAI apiMode = "openai" + // modeOllama: local ollama_chat/ entries pointed at the in-cluster Ollama. + modeOllama apiMode = "ollama" + // modeOpenAICompatible: any OpenAI-compatible BYOK aggregator (OpenRouter, + // Venice, NVIDIA, …). Explicit entries only, Model="openai/" with an + // explicit api_base = BaseURL and key read from EnvVar. No wildcard: + // aggregator namespaces are huge and overlapping, so we register only the + // models the operator asked for. + modeOpenAICompatible apiMode = "openai-compatible" +) -// ProviderInfo describes an LLM provider. +// ProviderInfo describes an LLM provider. knownProviders is the single +// source of truth: adding a provider is one row here, and every layer (the +// setup CLI, default-model selection, LiteLLM entry shaping, status, and +// the persisted record) reads from this struct instead of a per-provider +// switch. type ProviderInfo struct { - ID string // provider id (e.g. "anthropic", "openai", "ollama") + ID string // provider id (e.g. "anthropic", "openai", "venice") Name string // display name EnvVar string // primary env var for API key (empty for Ollama) AltEnvVars []string // fallback env vars checked in order (e.g. CLAUDE_CODE_OAUTH_TOKEN) + Mode apiMode // how model_list entries are shaped + BaseURL string // OpenAI-compatible base_url (modeOpenAICompatible only) + Default string // default chat model when --model is omitted ("" = ask/require) + SignupURL string // where to obtain an API key (shown as a hint) + Free []string // curated zero-marginal-cost model ids (seeded by --free) +} + +// IsBYOK reports whether the provider is a BYOK OpenAI-compatible +// aggregator reached over the public internet (as opposed to a native +// provider or the local Ollama). +func (p ProviderInfo) IsBYOK() bool { return p.Mode == modeOpenAICompatible } + +// knownProviders is the registry of supported LLM providers. The first +// three are native/local; the rest are BYOK OpenAI-compatible aggregators — +// each is pure data, no bespoke wiring. base_url values are intentionally +// without a trailing /v1 where LiteLLM appends it; aggregator paths that +// already include /v1 keep it (LiteLLM only auto-appends for bare hosts). +var knownProviders = []ProviderInfo{ + { + ID: ProviderAnthropic, Name: "Anthropic", EnvVar: "ANTHROPIC_API_KEY", + AltEnvVars: []string{"CLAUDE_CODE_OAUTH_TOKEN"}, Mode: modeAnthropic, + Default: "claude-sonnet-4-6", SignupURL: "https://console.anthropic.com/settings/keys", + }, + { + ID: ProviderOpenAI, Name: "OpenAI", EnvVar: "OPENAI_API_KEY", Mode: modeOpenAI, + Default: "gpt-5.5", SignupURL: "https://platform.openai.com/api-keys", + }, + { + ID: ProviderOllama, Name: "Ollama (local)", EnvVar: "", Mode: modeOllama, + }, + // ── BYOK OpenAI-compatible aggregators (the easy getting-started path) ── + // model_list entries are pure data: Model="openai/", api_base=BaseURL, + // key from EnvVar. Default models that can't be statically pinned (the + // aggregator's catalog rotates) are left blank — setup then resolves a + // model from the live /v1/models list or --model. + { + ID: "venice", Name: "Venice", EnvVar: "VENICE_API_KEY", Mode: modeOpenAICompatible, + BaseURL: "https://api.venice.ai/api/v1", SignupURL: "https://venice.ai/settings/api", + }, + { + ID: "openrouter", Name: "OpenRouter", EnvVar: "OPENROUTER_API_KEY", Mode: modeOpenAICompatible, + BaseURL: "https://openrouter.ai/api/v1", Default: "openrouter/auto", + SignupURL: "https://openrouter.ai/keys", + // Curated zero-cost models (snapshot — OpenRouter's free roster + // rotates; pass --model for any other). Seeded by `--free`. + Free: []string{ + "openrouter/elephant-alpha", + "openrouter/owl-alpha", + "poolside/laguna-m.1:free", + "tencent/hy3-preview:free", + "nvidia/nemotron-3-super-120b-a12b:free", + "nvidia/nemotron-3-ultra-550b-a55b:free", + "inclusionai/ring-2.6-1t:free", + }, + }, + { + ID: "nvidia", Name: "NVIDIA NIM", EnvVar: "NVIDIA_API_KEY", Mode: modeOpenAICompatible, + BaseURL: "https://integrate.api.nvidia.com/v1", SignupURL: "https://build.nvidia.com", + }, + { + ID: "gmi", Name: "GMI Cloud", EnvVar: "GMI_API_KEY", Mode: modeOpenAICompatible, + BaseURL: "https://api.gmi-serving.com/v1", SignupURL: "https://console.gmicloud.ai", + }, + { + ID: "novita", Name: "Novita", EnvVar: "NOVITA_API_KEY", Mode: modeOpenAICompatible, + BaseURL: "https://api.novita.ai/openai/v1", SignupURL: "https://novita.ai/settings/key-management", + }, + { + ID: "huggingface", Name: "Hugging Face Router", EnvVar: "HF_TOKEN", Mode: modeOpenAICompatible, + BaseURL: "https://router.huggingface.co/v1", SignupURL: "https://huggingface.co/settings/tokens", + }, +} + +// ProviderByID returns the registry entry for id and whether it was found. +func ProviderByID(id string) (ProviderInfo, bool) { + for _, p := range knownProviders { + if p.ID == id { + return p, true + } + } + return ProviderInfo{}, false +} + +// FetchOpenAICompatibleModels lists model ids from a provider's +// OpenAI-compatible GET /models endpoint. Used at setup time to +// resolve a real model id when an aggregator has no statically-pinnable +// default (its catalog rotates). Best-effort: a non-200, a network error, +// or an unparseable body returns an error the caller falls back from +// (prompt for / require --model). The just-entered apiKey authenticates +// the call from the host. +func FetchOpenAICompatibleModels(baseURL, apiKey string) ([]string, error) { + endpoint := strings.TrimRight(baseURL, "/") + "/models" + ctx, cancel := context.WithTimeout(context.Background(), 12*time.Second) + defer cancel() + + req, err := http.NewRequestWithContext(ctx, http.MethodGet, endpoint, nil) + if err != nil { + return nil, err + } + if apiKey != "" { + req.Header.Set("Authorization", "Bearer "+apiKey) + } + + resp, err := http.DefaultClient.Do(req) + if err != nil { + return nil, err + } + defer resp.Body.Close() + + body, err := io.ReadAll(io.LimitReader(resp.Body, 1<<20)) + if err != nil { + return nil, err + } + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("models endpoint returned %d", resp.StatusCode) + } + + var parsed struct { + Data []struct { + ID string `json:"id"` + } `json:"data"` + } + if err := json.Unmarshal(body, &parsed); err != nil { + return nil, fmt.Errorf("parse models response: %w", err) + } + + ids := make([]string, 0, len(parsed.Data)) + for _, m := range parsed.Data { + if m.ID != "" { + ids = append(ids, m.ID) + } + } + if len(ids) == 0 { + return nil, errors.New("models endpoint returned no models") + } + return ids, nil } // ProviderStatus captures effective global LiteLLM provider state. @@ -1308,16 +1460,42 @@ var WellKnownModels = map[string][]string{ }, } -// buildModelEntries creates LiteLLM model_list entries for a provider. -// Cloud providers (anthropic, openai) get a wildcard entry plus explicit -// entries for the requested models. Ollama gets explicit entries only -// (wildcards are broken for ollama_chat/). +// buildModelEntries creates LiteLLM model_list entries for a provider, +// shaped by its registry Mode: +// - anthropic/openai: explicit entries (so the chosen model wins Rank's +// "first chat-capable" rule) followed by a /* wildcard. +// - ollama: explicit ollama_chat/ entries only (wildcards are broken). +// - openai-compatible: explicit openai/ entries with an explicit +// api_base = BaseURL and key from EnvVar — no wildcard. +// +// A provider not in the registry falls back to the generic openai/ +// shape keyed on _API_KEY (legacy `setup custom` behavior). func buildModelEntries(provider string, models []string) []ModelEntry { - var entries []ModelEntry + p, ok := ProviderByID(provider) + if !ok { + // Unknown provider: legacy generic shape (no api_base). + var entries []ModelEntry + for _, m := range models { + entries = append(entries, ModelEntry{ + ModelName: m, + LiteLLMParams: LiteLLMParams{ + Model: provider + "/" + m, + APIKey: fmt.Sprintf("os.environ/%s_API_KEY", strings.ToUpper(provider)), + }, + }) + } + return entries + } + + keyRef := "" + if p.EnvVar != "" { + keyRef = "os.environ/" + p.EnvVar + } - switch provider { - case ProviderOllama: - // Explicit entries — ollama_chat/* wildcards are broken in LiteLLM + var entries []ModelEntry + switch p.Mode { + case modeOllama: + // Explicit entries — ollama_chat/* wildcards are broken in LiteLLM. for _, m := range models { entries = append(entries, ModelEntry{ ModelName: m, @@ -1327,7 +1505,7 @@ func buildModelEntries(provider string, models []string) []ModelEntry { }, }) } - case ProviderAnthropic: + case modeAnthropic: cachePoints := anthropicCacheControlPoints() // Explicit entries first so the user-selected model is the primary // under model.Rank's "first chat-capable wins" rule. Hermes cannot @@ -1338,39 +1516,41 @@ func buildModelEntries(provider string, models []string) []ModelEntry { ModelName: m, LiteLLMParams: LiteLLMParams{ Model: m, - APIKey: "os.environ/ANTHROPIC_API_KEY", + APIKey: keyRef, CacheControlInjectionPoints: cachePoints, }, }) } - // Wildcard: routes any anthropic model without explicit registration. entries = append(entries, ModelEntry{ ModelName: "anthropic/*", LiteLLMParams: LiteLLMParams{ Model: "anthropic/*", - APIKey: "os.environ/ANTHROPIC_API_KEY", + APIKey: keyRef, CacheControlInjectionPoints: cachePoints, }, }) - case ProviderOpenAI: + case modeOpenAI: // Explicit-before-wildcard, same rationale as Anthropic above. for _, m := range models { entries = append(entries, ModelEntry{ ModelName: m, - LiteLLMParams: LiteLLMParams{Model: "openai/" + m, APIKey: "os.environ/OPENAI_API_KEY"}, + LiteLLMParams: LiteLLMParams{Model: "openai/" + m, APIKey: keyRef}, }) } entries = append(entries, ModelEntry{ ModelName: "openai/*", - LiteLLMParams: LiteLLMParams{Model: "openai/*", APIKey: "os.environ/OPENAI_API_KEY"}, + LiteLLMParams: LiteLLMParams{Model: "openai/*", APIKey: keyRef}, }) - default: + case modeOpenAICompatible: + // Explicit openai-shaped entries with an explicit api_base. No + // wildcard — the aggregator's catalog is huge and overlaps others. for _, m := range models { entries = append(entries, ModelEntry{ ModelName: m, LiteLLMParams: LiteLLMParams{ - Model: provider + "/" + m, - APIKey: fmt.Sprintf("os.environ/%s_API_KEY", strings.ToUpper(provider)), + Model: "openai/" + m, + APIBase: p.BaseURL, + APIKey: keyRef, }, }) } @@ -1464,6 +1644,18 @@ func detectProvider(entry ModelEntry) string { } model := entry.LiteLLMParams.Model + // BYOK aggregator entries are openai-shaped (openai/) but carry an + // explicit api_base — match it back to the registry so status groups + // them under their real provider (venice, openrouter, …) rather than + // "openai". Checked before the bare openai/ prefix below. + if base := entry.LiteLLMParams.APIBase; base != "" && strings.HasPrefix(model, ProviderOpenAI+"/") { + for _, p := range knownProviders { + if p.Mode == modeOpenAICompatible && p.BaseURL == base { + return p.ID + } + } + } + // Wildcard entries if strings.HasPrefix(model, ProviderAnthropic+"/") { return ProviderAnthropic diff --git a/internal/model/model_test.go b/internal/model/model_test.go index 210ef676..59ec99fa 100644 --- a/internal/model/model_test.go +++ b/internal/model/model_test.go @@ -1166,3 +1166,82 @@ func modelNames(entries []ModelEntry) []string { } return out } + +func TestBuildModelEntries_OpenAICompatible(t *testing.T) { + entries := buildModelEntries("venice", []string{"venice-uncensored"}) + if len(entries) != 1 { + t.Fatalf("got %d entries, want 1 (aggregators get no wildcard)", len(entries)) + } + e := entries[0] + if e.ModelName != "venice-uncensored" { + t.Errorf("model_name = %q", e.ModelName) + } + if e.LiteLLMParams.Model != "openai/venice-uncensored" { + t.Errorf("model = %q, want openai/venice-uncensored", e.LiteLLMParams.Model) + } + if e.LiteLLMParams.APIBase != "https://api.venice.ai/api/v1" { + t.Errorf("api_base = %q, want venice base_url", e.LiteLLMParams.APIBase) + } + if e.LiteLLMParams.APIKey != "os.environ/VENICE_API_KEY" { + t.Errorf("api_key = %q, want os.environ/VENICE_API_KEY", e.LiteLLMParams.APIKey) + } +} + +func TestBuildModelEntries_UnknownProviderLegacyShape(t *testing.T) { + // Providers not in the registry keep the legacy generic shape (no api_base). + entries := buildModelEntries("somevendor", []string{"m1"}) + if len(entries) != 1 || entries[0].LiteLLMParams.Model != "somevendor/m1" { + t.Fatalf("unexpected legacy entries: %+v", entries) + } + if entries[0].LiteLLMParams.APIBase != "" { + t.Errorf("legacy shape must not set api_base, got %q", entries[0].LiteLLMParams.APIBase) + } +} + +func TestProviderByID(t *testing.T) { + p, ok := ProviderByID("openrouter") + if !ok { + t.Fatal("openrouter must be in the registry") + } + if p.BaseURL == "" || p.EnvVar != "OPENROUTER_API_KEY" || len(p.Free) == 0 { + t.Errorf("openrouter row incomplete: %+v", p) + } + if _, ok := ProviderByID("nope"); ok { + t.Error("unknown provider must not be found") + } +} + +func TestDetectProvider_AggregatorByAPIBase(t *testing.T) { + venice := ModelEntry{ModelName: "x", LiteLLMParams: LiteLLMParams{ + Model: "openai/x", APIBase: "https://api.venice.ai/api/v1", + }} + if got := detectProvider(venice); got != "venice" { + t.Errorf("venice entry detected as %q, want venice", got) + } + // A native OpenAI entry (no api_base) must still read as openai. + oai := ModelEntry{ModelName: "gpt-5.5", LiteLLMParams: LiteLLMParams{Model: "openai/gpt-5.5"}} + if got := detectProvider(oai); got != ProviderOpenAI { + t.Errorf("openai entry detected as %q, want openai", got) + } +} + +func TestProviderRegistry_Invariants(t *testing.T) { + seen := map[string]bool{} + for _, p := range knownProviders { + if seen[p.ID] { + t.Errorf("duplicate provider id %q", p.ID) + } + seen[p.ID] = true + if p.Mode == modeOpenAICompatible && (p.BaseURL == "" || p.EnvVar == "") { + t.Errorf("BYOK provider %q must set BaseURL and EnvVar", p.ID) + } + if len(p.Free) > 0 && p.Mode != modeOpenAICompatible { + t.Errorf("provider %q has Free models but is not openai-compatible", p.ID) + } + } + for _, id := range []string{ProviderAnthropic, ProviderOpenAI, ProviderOllama} { + if _, ok := ProviderByID(id); !ok { + t.Errorf("native provider %q missing from registry", id) + } + } +} diff --git a/internal/monetizeapi/evaluatorenrollment.go b/internal/monetizeapi/evaluatorenrollment.go new file mode 100644 index 00000000..16d60a8c --- /dev/null +++ b/internal/monetizeapi/evaluatorenrollment.go @@ -0,0 +1,204 @@ +package monetizeapi + +import ( + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + runtime "k8s.io/apimachinery/pkg/runtime" +) + +// ── EvaluatorEnrollment ───────────────────────────────────────────────────── +// +// EvaluatorEnrollment is an evaluator's opt-in to the OBOL eval market: an +// address + the task types it can re-run + an optional device attestation. +// The spec is evaluator-written; the LADDER STATE in status is controller- +// owned (Shadow → Probation → Full, per task type — design doc §11.4). No +// staking: the only collateral is the future income a reputation earns. + +// Evaluator ladder tiers. +const ( + EvaluatorTierShadow = "Shadow" + EvaluatorTierProbation = "Probation" + EvaluatorTierFull = "Full" +) + +// +kubebuilder:object:root=true +// +kubebuilder:resource:scope=Namespaced,shortName=ee +// +kubebuilder:subresource:status +// +kubebuilder:printcolumn:name="Address",type=string,JSONPath=`.spec.address` +// +kubebuilder:printcolumn:name="Age",type=date,JSONPath=`.metadata.creationTimestamp` + +// EvaluatorEnrollment opts an evaluator into the eval market. +type EvaluatorEnrollment struct { + metav1.TypeMeta `json:",inline"` + metav1.ObjectMeta `json:"metadata,omitempty"` + Spec EvaluatorEnrollmentSpec `json:"spec,omitempty"` + Status EvaluatorEnrollmentStatus `json:"status,omitempty"` +} + +// +kubebuilder:object:root=true + +// EvaluatorEnrollmentList is the list form. +type EvaluatorEnrollmentList struct { + metav1.TypeMeta `json:",inline"` + metav1.ListMeta `json:"metadata,omitempty"` + Items []EvaluatorEnrollment `json:"items"` +} + +type EvaluatorEnrollmentSpec struct { + // Address is the evaluator's payout/identity address — the same address + // used in eval-commit/eval-reveal annotations and bound into commitments. + // +kubebuilder:validation:Required + // +kubebuilder:validation:Pattern=`^0x[a-fA-F0-9]{40}$` + Address string `json:"address"` + + // TaskTypes this evaluator can re-run (versioned refs, e.g. benchmark@v1). + // +kubebuilder:validation:Required + TaskTypes []string `json:"taskTypes"` + + // Attestation is the device-binding claim. v1 RECORDS it (sybil cost is + // real hardware per identity once verification lands with the Secure + // Enclave wiring); scheme "none" is honest-unattested. + Attestation EvaluatorAttestation `json:"attestation,omitempty"` +} + +type EvaluatorAttestation struct { + // Scheme: none (unattested) | secure-enclave (device-bound P-256 key). + // +kubebuilder:validation:Enum=none;secure-enclave + Scheme string `json:"scheme,omitempty"` + + // PublicKey is the attestation public key (secure-enclave scheme). + PublicKey string `json:"publicKey,omitempty"` + + // Signature is the enrollment signature over the address (scheme-defined). + Signature string `json:"signature,omitempty"` +} + +// EvaluatorEnrollmentStatus is controller-owned ladder state. +type EvaluatorEnrollmentStatus struct { + ObservedGeneration int64 `json:"observedGeneration,omitempty"` + + // Records hold per-task-type ladder progress (reputation is per task + // type — benchmark@v1 rep says nothing about finetune@v1). + Records []EvaluatorLadderRecord `json:"records,omitempty"` +} + +// EvaluatorLadderRecord is one task type's ladder progress. +type EvaluatorLadderRecord struct { + TaskType string `json:"taskType,omitempty"` + + // Tier: Shadow | Probation | Full. New enrollments start Shadow. + Tier string `json:"tier,omitempty"` + + // ShadowAgreements counts shadow verdicts within tolerance of the quorum + // median (promotion to Probation at the task package's threshold). + ShadowAgreements int64 `json:"shadowAgreements,omitempty"` + + // ProbationEvals counts paid in-band evals while on Probation (promotion + // to Full at the package threshold). + ProbationEvals int64 `json:"probationEvals,omitempty"` + + // Completed counts all settled panel seats (any tier). + Completed int64 `json:"completed,omitempty"` + + // Divergences counts settled seats graded out of band (incl. non/bad + // reveals) — the negative reputation signal. + Divergences int64 `json:"divergences,omitempty"` + + // RecentFulfillers are the last few fulfiller addresses this evaluator + // judged — the pair-diversity rule down-weights repeat pairings. + RecentFulfillers []string `json:"recentFulfillers,omitempty"` + + // LastEvalAt is when this evaluator's most recent seat settled — the + // anchor for reputation decay (decayHalfLife). + LastEvalAt *metav1.Time `json:"lastEvalAt,omitempty"` + + // GroundedEvals counts settled seats whose verdict was grounded by an + // on-chain ERC-8004 validation entry. + GroundedEvals int `json:"groundedEvals,omitempty"` +} + +// ── deepcopy (hand-written, matching the package idiom) ───────────────────── + +func (in *EvaluatorEnrollment) DeepCopyInto(out *EvaluatorEnrollment) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) + in.Spec.DeepCopyInto(&out.Spec) + in.Status.DeepCopyInto(&out.Status) +} + +func (in *EvaluatorEnrollment) DeepCopy() *EvaluatorEnrollment { + if in == nil { + return nil + } + out := new(EvaluatorEnrollment) + in.DeepCopyInto(out) + return out +} + +func (in *EvaluatorEnrollment) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +func (in *EvaluatorEnrollmentList) DeepCopyInto(out *EvaluatorEnrollmentList) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ListMeta.DeepCopyInto(&out.ListMeta) + if in.Items != nil { + l, m := &in.Items, &out.Items + *m = make([]EvaluatorEnrollment, len(*l)) + for i := range *l { + (*l)[i].DeepCopyInto(&(*m)[i]) + } + } +} + +func (in *EvaluatorEnrollmentList) DeepCopy() *EvaluatorEnrollmentList { + if in == nil { + return nil + } + out := new(EvaluatorEnrollmentList) + in.DeepCopyInto(out) + return out +} + +func (in *EvaluatorEnrollmentList) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +func (in *EvaluatorEnrollmentSpec) DeepCopyInto(out *EvaluatorEnrollmentSpec) { + *out = *in + if in.TaskTypes != nil { + out.TaskTypes = make([]string, len(in.TaskTypes)) + copy(out.TaskTypes, in.TaskTypes) + } + out.Attestation = in.Attestation +} + +func (in *EvaluatorEnrollmentStatus) DeepCopyInto(out *EvaluatorEnrollmentStatus) { + *out = *in + if in.Records != nil { + l, m := &in.Records, &out.Records + *m = make([]EvaluatorLadderRecord, len(*l)) + for i := range *l { + (*l)[i].DeepCopyInto(&(*m)[i]) + } + } +} + +func (in *EvaluatorLadderRecord) DeepCopyInto(out *EvaluatorLadderRecord) { + *out = *in + if in.RecentFulfillers != nil { + out.RecentFulfillers = make([]string, len(in.RecentFulfillers)) + copy(out.RecentFulfillers, in.RecentFulfillers) + } + if in.LastEvalAt != nil { + l, m := &in.LastEvalAt, &out.LastEvalAt + *m = (*l).DeepCopy() + } +} diff --git a/internal/monetizeapi/servicebounty.go b/internal/monetizeapi/servicebounty.go new file mode 100644 index 00000000..bdcbfbfd --- /dev/null +++ b/internal/monetizeapi/servicebounty.go @@ -0,0 +1,612 @@ +package monetizeapi + +import ( + "crypto/sha256" + "encoding/hex" + "fmt" + "strings" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + runtime "k8s.io/apimachinery/pkg/runtime" +) + +// EvalCommitHash binds an evaluator's score commitment to their address: +// sha256("||"). The address inside the +// preimage means a commitment cannot be copied by another evaluator and +// replayed with the original's revealed {score, salt} (Kleros whitepaper +// §4.3). CLI and controller MUST compute this identically. +func EvalCommitHash(score int64, salt, address string) string { + sum := sha256.Sum256(fmt.Appendf(nil, "%d|%s|%s", score, salt, strings.ToLower(address))) + return "0x" + hex.EncodeToString(sum[:]) +} + +// ── ServiceBounty ─────────────────────────────────────────────────────────── +// +// ServiceBounty is the demand-side inverse of a ServiceOffer. A ServiceOffer is +// standing supply that converges to one live route and stays up; a ServiceBounty +// is time-boxed demand that converges to one paid deliverable and closes. Both +// share the same money rail (x402), identity rail (ERC-8004), and controller +// plumbing, run in opposite directions. +// +// Task semantics are deliberately NOT hardcoded in this CRD. spec.task.typeRef +// points at an embedded, versioned task-type package (internal/embed/bountytasks, +// e.g. "benchmark@v1") that owns the param schema, the eval method + tolerance, +// the OBOL eval pricing, the hardware-proof policy, and the A2UI report schema. +// New task types drop in as data — the CRD and controller never change. +// +// Verification is reputation-graded with NO validator set and NO slashing: the +// escrow releases on an accepted, ERC-8004-reputation-weighted verdict produced +// by an OBOL-paid evaluation market. See plans/bounty-ane-marketplace-design.md. + +// +kubebuilder:object:root=true +// +kubebuilder:resource:scope=Namespaced,shortName=sb +// +kubebuilder:subresource:status +// +kubebuilder:printcolumn:name="Task",type=string,JSONPath=`.spec.task.typeRef` +// +kubebuilder:printcolumn:name="Reward",type=string,JSONPath=`.spec.reward.amount` +// +kubebuilder:printcolumn:name="Verification",type=string,JSONPath=`.spec.eval.mode` +// +kubebuilder:printcolumn:name="Phase",type=string,JSONPath=`.status.phase` +// +kubebuilder:printcolumn:name="Age",type=date,JSONPath=`.metadata.creationTimestamp` + +// ServiceBounty declares a unit of paid work (benchmark, fine-tune, serve, …) +// with an escrowed reward released on an accepted verdict. +type ServiceBounty struct { + metav1.TypeMeta `json:",inline"` + metav1.ObjectMeta `json:"metadata,omitempty"` + Spec ServiceBountySpec `json:"spec,omitempty"` + Status ServiceBountyStatus `json:"status,omitempty"` +} + +// +kubebuilder:object:root=true + +// ServiceBountyList is the list form for kubectl/list operations. +type ServiceBountyList struct { + metav1.TypeMeta `json:",inline"` + metav1.ListMeta `json:"metadata,omitempty"` + Items []ServiceBounty `json:"items"` +} + +type ServiceBountySpec struct { + // Task describes the work. spec.task.typeRef selects an embedded, + // versioned task-type package; spec.task.params is validated against + // that package's schema at admission. + // +kubebuilder:validation:Required + Task ServiceBountyTask `json:"task"` + + // Acceptance is how a submission is judged. Defaults come from the task + // type; the poster may tighten them. + Acceptance ServiceBountyAcceptance `json:"acceptance,omitempty"` + + // Reward is the escrowed payment released to the fulfiller on acceptance. + // +kubebuilder:validation:Required + Reward ServiceBountyReward `json:"reward"` + + // Eval configures the OBOL-paid evaluation market (a SEPARATE payment leg + // from the reward — x402 cannot splice a fee out of the reward auth). + Eval ServiceBountyEval `json:"eval,omitempty"` + + // Trust selects the reputation gate + optional refundable self-bond. No + // validator stake, no slashing — reputation (lost future income) is the + // only collateral. + Trust ServiceBountyTrust `json:"trust,omitempty"` + + // Deadline: past it with no accepted verdict → Expired → Refunded. + Deadline *metav1.Time `json:"deadline,omitempty"` + + // MaxFulfillers: 1 = single-winner (default); >1 = first-N-valid paid. + // +kubebuilder:default=1 + MaxFulfillers int64 `json:"maxFulfillers,omitempty"` +} + +// ServiceBountyTask carries the task-type reference + opaque params. The +// controller never interprets params beyond validating them against the +// resolved task-type schema. +type ServiceBountyTask struct { + // TypeRef resolves an embedded task-type package, e.g. "benchmark@v1". + // +kubebuilder:validation:Required + TypeRef string `json:"typeRef"` + + // Free-form knobs validated against the task type's param schema. + Params map[string]string `json:"params,omitempty"` + + // Target model metadata (reuses ServiceOffer's model shape). + TargetModel ServiceOfferModel `json:"targetModel,omitempty"` + + // DatasetCommit pins the eval dataset (committed root + the fraction kept + // private so a public re-run can't leak answers / enable train-on-test). + DatasetCommit ServiceBountyDatasetCommit `json:"datasetCommit,omitempty"` + + // HardwareProof strength required of the fulfiller. self-report is a + // reputation-backed claim (forgeable); gpu-attestation is cryptographic + // (NVIDIA CC / enclave-binding); evaluator-measured moves the throughput + // measurement onto attested evaluator hardware. + // +kubebuilder:validation:Enum=self-report;gpu-attestation;evaluator-measured + HardwareProof string `json:"hardwareProof,omitempty"` +} + +type ServiceBountyDatasetCommit struct { + // Root is a Merkle root committing the (partially private) eval dataset. + Root string `json:"root,omitempty"` + // PrivateFraction (0..1, as a string to keep schema stable) of rows kept + // secret and revealed only to sampled evaluators at eval time. + PrivateFraction string `json:"privateFraction,omitempty"` +} + +type ServiceBountyAcceptance struct { + // Method judges a submission. Benchmarks are NOT bit-exact: rerun-tolerance + // re-runs the harness and accepts a score within tolerance. The commitHash + // is integrity (anti bait-and-switch), not a determinism gate. + // +kubebuilder:validation:Enum=rerun-tolerance;harness-rerun;sla-probe;poster-manual + Method string `json:"method,omitempty"` + + // Tolerance per metric (e.g. {"mmlu":"0.01"}). Default from the task type. + Tolerance map[string]string `json:"tolerance,omitempty"` + + // CommitReveal requires evaluators to commit then reveal scores, so they + // can't pre-agree on a number. + CommitReveal bool `json:"commitReveal,omitempty"` +} + +// ServiceBountyReward mirrors the ServiceOfferPayment envelope (network + +// payTo + asset) so buy/sell/bounty all read the same way, plus the amount and +// the escrow rail. Network + PayTo are required to construct the upto +// authorization: the chain it settles on and the poster's refund address. +type ServiceBountyReward struct { + // Payment network (e.g. "base", "base-sepolia"). + Network string `json:"network,omitempty"` + + // PayTo is the poster's address: the escrow-return / refund destination. + // The fulfiller payout address is bound at claim time (witness.to in the + // upto auth), not here. + // +kubebuilder:validation:Pattern=`^0x[a-fA-F0-9]{40}$` + PayTo string `json:"payTo,omitempty"` + + // Asset reuses ServiceOffer's asset shape (USDC eip3009 / OBOL permit2). + Asset ServiceOfferAsset `json:"asset,omitempty"` + + // Amount is the lump-sum reward (human units, e.g. "500.00"). + Amount string `json:"amount,omitempty"` + + // Escrow selects the x402 settlement rail + reputation-driven mode. + Escrow ServiceBountyEscrow `json:"escrow,omitempty"` +} + +type ServiceBountyEscrow struct { + // Scheme: 'upto' (live: facilitator holds a recipient-bound auth, settles + // ≤ max) or 'authCapture' (funds-locked, used above valueCap once the Go + // impl lands — x402-foundation/x402#2298). + // +kubebuilder:validation:Enum=upto;authCapture + Scheme string `json:"scheme,omitempty"` + + // Facilitator URL (our own facilitator acts as the bounded settlement + // trigger; payTo is signed into the auth so it can never redirect funds). + Facilitator string `json:"facilitator,omitempty"` + + // Mode is selected by the fulfiller's reputation: 'auto' (optimistic), + // 'facilitator-check' (deterministic re-run), 'onchain-lock' (authCapture). + // +kubebuilder:validation:Enum=auto;facilitator-check;onchain-lock + Mode string `json:"mode,omitempty"` + + // ValueCapMicros: above this the escrow must use an on-chain lock. + ValueCapMicros string `json:"valueCapMicros,omitempty"` +} + +// Eval verification modes. Verification is ON by default; skipping is an +// explicit, labeled act (--dangerously-skip-verification) — a skipped bounty +// emits no ERC-8004 validation entries and its reputation feedback is +// suppressed, so it can never be farmed for reputation. +const ( + EvalModeRequired = "required" + EvalModeDangerouslySkipped = "dangerouslySkipped" +) + +// ServiceBountyEval is the OBOL-paid evaluation market. Evaluators are paid for +// the WORK (pass or fail), selected by reputation (not stake), and paid in OBOL +// by default via x402 batch-settlement. +type ServiceBountyEval struct { + // K evaluators: median-of-k quorum; k≥3 whenever a probation seat is + // occupied (the median absorbs one outlier). + // +kubebuilder:default=1 + K int64 `json:"k,omitempty"` + + // Mode gates verification. 'required' (default) routes acceptance through + // the evaluator quorum once the eval market is wired — until then a poster + // verdict is recorded as PosterOverride. 'dangerouslySkipped' declares + // poster-as-judge up front: same override path, but the bounty is marked + // unverified and produces no reputation signal. + // +kubebuilder:default="required" + // +kubebuilder:validation:Enum=required;dangerouslySkipped + Mode string `json:"mode,omitempty"` + + // Selection: VRF-sampled after submission, reputation-weighted; the poster + // cannot hand-pick. + // +kubebuilder:validation:Enum=vrf-reputation-weighted;poster-manual + Selection string `json:"selection,omitempty"` + + // Payment for evaluators — a separate leg from the reward. + Payment ServiceBountyEvalPayment `json:"payment,omitempty"` +} + +type ServiceBountyEvalPayment struct { + // Asset defaults to OBOL (verification is an OBOL utility sink). + // +kubebuilder:default="OBOL" + Asset string `json:"asset,omitempty"` + + // PerEvaluator fee (human units). + PerEvaluator string `json:"perEvaluator,omitempty"` + + // FundedBy: 'poster' (separate poster-funded eval budget). + // +kubebuilder:default="poster" + FundedBy string `json:"fundedBy,omitempty"` + + // Settle: 'batch-settlement' pays all K evaluators in one tx. + // +kubebuilder:default="batch-settlement" + Settle string `json:"settle,omitempty"` +} + +type ServiceBountyTrust struct { + // ReputationGate derives the fulfiller's maxBountyValue from ERC-8004 + // getSummary (read with a curated, trusted client filter). + ReputationGate bool `json:"reputationGate,omitempty"` + + // SelfBond is an OPTIONAL refundable bond the fulfiller posts from their + // OWN funds (returned on success). It is never slashed to a validator set. + SelfBond ServiceBountySelfBond `json:"selfBond,omitempty"` +} + +type ServiceBountySelfBond struct { + Required bool `json:"required,omitempty"` + Amount string `json:"amount,omitempty"` + Token string `json:"token,omitempty"` +} + +// ServiceBountyStatus mirrors the AND-rollup condition idiom used by +// ServiceOffer. Machine truth is the condition set; Phase is the human rollup. +type ServiceBountyStatus struct { + ObservedGeneration int64 `json:"observedGeneration,omitempty"` + Phase string `json:"phase,omitempty"` + Conditions []Condition `json:"conditions,omitempty"` + + // EscrowState: Reserved | Captured | Voided (held auth at the facilitator). + EscrowState string `json:"escrowState,omitempty"` + + // WeightedScore is the reputation-weighted eval verdict (0-100). + WeightedScore int64 `json:"weightedScore,omitempty"` + + // CaptureTxHash / RefundTxHash record the settled reward or refund. + CaptureTxHash string `json:"captureTxHash,omitempty"` + RefundTxHash string `json:"refundTxHash,omitempty"` + + // ReportURI points at the SIWx/local-gated A2UI report (deliverable). + ReportURI string `json:"reportURI,omitempty"` + + // Claims are observed fulfiller bindings (single-winner is the common case, + // so claims live in status, not a separate CR). + Claims []ServiceBountyClaim `json:"claims,omitempty"` + + // EvaluatorPanel is the controller-selected seat assignment (deterministic + // per-bounty sampling from enrolled evaluators). Empty panel = open-door + // fallback (insufficient pool) — any address may evaluate, as in early v1. + EvaluatorPanel []ServiceBountyPanelSeat `json:"evaluatorPanel,omitempty"` + + // Evaluations are the eval-market verdicts promoted from the + // obol.org/eval-commit- / eval-reveal- annotation channel. + Evaluations []ServiceBountyEvaluation `json:"evaluations,omitempty"` + + // EvalBudgetState tracks the poster-funded OBOL eval budget + // (k × perEvaluator) at the escrow gateway: Reserved | Captured | Voided. + // Evaluators are paid for the WORK, pass or fail. + EvalBudgetState string `json:"evalBudgetState,omitempty"` + + // EvalPayoutTxHash records the batch-settlement receipt for the eval leg. + EvalPayoutTxHash string `json:"evalPayoutTxHash,omitempty"` + + // LadderRecorded latches the one-shot cross-bounty ladder bookkeeping so + // repeated reconciles after quorum never double-count. + LadderRecorded bool `json:"ladderRecorded,omitempty"` + + // RevealDeadline opens once K commitments are in: every commit closes + // before any reveal opens, and a missing reveal past this instant is + // graded as a worst-case outlier (nonRevealPenalty). + RevealDeadline *metav1.Time `json:"revealDeadline,omitempty"` + + // BondState tracks the fulfiller self-bond at the escrow gateway: + // Reserved | Returned (success/honest timeout) | Forfeited (rejected work, + // offsets the poster's burned eval budget). + BondState string `json:"bondState,omitempty"` + + // PanelSeed records the randomness source the evaluator panel was drawn + // from, so the sampling is auditable (drand round, raw randomness, sig). + PanelSeed *ServiceBountyPanelSeed `json:"panelSeed,omitempty"` + + // Escalation is the second-round eval state opened when the first-round + // quorum diverges beyond the task's escalation epsilon. + Escalation *ServiceBountyEscalation `json:"escalation,omitempty"` + + // EscrowSpender is the facilitator address Permit2 vouchers must name as + // the only executor (Receipt.Spender echoed into status for signers). + EscrowSpender string `json:"escrowSpender,omitempty"` +} + +// ServiceBountyPanelSeed is the auditable randomness behind a panel draw. +type ServiceBountyPanelSeed struct { + // Source names the randomness origin (e.g. drand, local-dev). + Source string `json:"source"` + + // Round is the drand round the randomness came from. + Round uint64 `json:"round,omitempty"` + + // Randomness is the beacon output the panel sampling was keyed on. + Randomness string `json:"randomness,omitempty"` + + // Signature is the beacon signature proving the randomness. + Signature string `json:"signature,omitempty"` +} + +// ServiceBountyEscalation is one escalation round: a fresh, larger panel +// re-evaluates the same submission with its own commit-reveal cycle and its +// own eval budget. +type ServiceBountyEscalation struct { + // Round is the escalation round number (1 = first escalation). + Round int `json:"round"` + + // Reason records why the escalation opened (e.g. quorum divergence). + Reason string `json:"reason,omitempty"` + + // Panel is the escalation-round seat assignment. + Panel []ServiceBountyPanelSeat `json:"panel,omitempty"` + + // Evaluations are the escalation round's commit-reveal records. + Evaluations []ServiceBountyEvaluation `json:"evaluations,omitempty"` + + // RevealDeadline is the escalation round's commit→reveal cutoff. + RevealDeadline *metav1.Time `json:"revealDeadline,omitempty"` + + // VoucherDeadline is when the escalation eval-budget voucher expires. + VoucherDeadline *metav1.Time `json:"voucherDeadline,omitempty"` + + // BudgetState tracks the escalation eval budget at the escrow gateway: + // Reserved | Captured | Voided. + BudgetState string `json:"budgetState,omitempty"` +} + +// Panel seat kinds (design doc §11.4): full and probation seats count in the +// median-of-k quorum; shadow seats are graded against the median but never +// counted (the free reputation on-ramp). +const ( + PanelSeatFull = "full" + PanelSeatProbation = "probation" + PanelSeatShadow = "shadow" +) + +// ServiceBountyPanelSeat is one selected evaluator seat. +type ServiceBountyPanelSeat struct { + // Address is the enrolled evaluator's address. + Address string `json:"address,omitempty"` + + // Seat: full | probation | shadow. + Seat string `json:"seat,omitempty"` +} + +// ServiceBountyEvaluation is one evaluator's commit-reveal record. WithinBand +// is the per-bounty ladder bookkeeping hook: divergence from the quorum median +// (or a missing/invalid reveal) is what future reputation feedback keys on. +type ServiceBountyEvaluation struct { + // Address is the evaluator's payout/identity address (annotation key suffix). + Address string `json:"address,omitempty"` + + // CommitHash = EvalCommitHash(score, salt, address), promoted first-write-wins. + CommitHash string `json:"commitHash,omitempty"` + + // Score is the revealed 0-100 verdict (ERC-8004 validationResponse semantics). + Score int64 `json:"score,omitempty"` + + // RevealedAt records when a valid reveal was promoted. + RevealedAt *metav1.Time `json:"revealedAt,omitempty"` + + // WithinBand is false for NonReveal/BadReveal and for revealed scores + // outside the outlier band around the quorum median. + WithinBand bool `json:"withinBand,omitempty"` + + // Phase: Committed | Revealed | BadReveal | NonReveal. + Phase string `json:"phase,omitempty"` + + // Seat mirrors the panel seat kind (full | probation | shadow); empty in + // open-door mode. + Seat string `json:"seat,omitempty"` + + // Paid marks inclusion in the eval-budget batch settlement (counting + // seats that revealed validly; shadows evaluate free). + Paid bool `json:"paid,omitempty"` + + // ValidationTxHash is the evaluator-submitted ERC-8004 validationResponse + // transaction, recorded as provenance (the evaluator's OWN wallet signs; + // the controller never does). + ValidationTxHash string `json:"validationTxHash,omitempty"` + + // Grounded marks a verdict backed by an on-chain ERC-8004 validation + // entry observed for this bounty's eval-request hash — the chain-anchored + // reputation signal, as opposed to an annotation-only reveal. + Grounded bool `json:"grounded,omitempty"` +} + +type ServiceBountyClaim struct { + FulfillerAddress string `json:"fulfillerAddress,omitempty"` + ClaimedAt *metav1.Time `json:"claimedAt,omitempty"` + // CommitHash binds the worker to a specific model + outputs (anti + // bait-and-switch), revealed at submit. + CommitHash string `json:"commitHash,omitempty"` + // Phase: Claimed | Submitted | Verified | Rejected. + Phase string `json:"phase,omitempty"` +} + +// ── deepcopy (hand-written to match controller-gen idioms in +// zz_generated.deepcopy.go; the Reward/Eval/Trust sub-trees are pure value +// structs so the shallow `*out = *in` is already a deep copy for them) ───── + +func (in *ServiceBounty) DeepCopyInto(out *ServiceBounty) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) + in.Spec.DeepCopyInto(&out.Spec) + in.Status.DeepCopyInto(&out.Status) +} + +func (in *ServiceBounty) DeepCopy() *ServiceBounty { + if in == nil { + return nil + } + out := new(ServiceBounty) + in.DeepCopyInto(out) + return out +} + +func (in *ServiceBounty) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +func (in *ServiceBountyList) DeepCopyInto(out *ServiceBountyList) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ListMeta.DeepCopyInto(&out.ListMeta) + if in.Items != nil { + l, m := &in.Items, &out.Items + *m = make([]ServiceBounty, len(*l)) + for i := range *l { + (*l)[i].DeepCopyInto(&(*m)[i]) + } + } +} + +func (in *ServiceBountyList) DeepCopy() *ServiceBountyList { + if in == nil { + return nil + } + out := new(ServiceBountyList) + in.DeepCopyInto(out) + return out +} + +func (in *ServiceBountyList) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +func (in *ServiceBountySpec) DeepCopyInto(out *ServiceBountySpec) { + *out = *in + in.Task.DeepCopyInto(&out.Task) + in.Acceptance.DeepCopyInto(&out.Acceptance) + out.Reward = in.Reward + out.Eval = in.Eval + out.Trust = in.Trust + if in.Deadline != nil { + l, m := &in.Deadline, &out.Deadline + *m = (*l).DeepCopy() + } +} + +func (in *ServiceBountyTask) DeepCopyInto(out *ServiceBountyTask) { + *out = *in + if in.Params != nil { + out.Params = make(map[string]string, len(in.Params)) + for k, v := range in.Params { + out.Params[k] = v + } + } + out.TargetModel = in.TargetModel + out.DatasetCommit = in.DatasetCommit +} + +func (in *ServiceBountyAcceptance) DeepCopyInto(out *ServiceBountyAcceptance) { + *out = *in + if in.Tolerance != nil { + out.Tolerance = make(map[string]string, len(in.Tolerance)) + for k, v := range in.Tolerance { + out.Tolerance[k] = v + } + } +} + +func (in *ServiceBountyStatus) DeepCopyInto(out *ServiceBountyStatus) { + *out = *in + if in.Conditions != nil { + l, m := &in.Conditions, &out.Conditions + *m = make([]Condition, len(*l)) + for i := range *l { + (*l)[i].DeepCopyInto(&(*m)[i]) + } + } + if in.Claims != nil { + l, m := &in.Claims, &out.Claims + *m = make([]ServiceBountyClaim, len(*l)) + for i := range *l { + (*l)[i].DeepCopyInto(&(*m)[i]) + } + } + if in.EvaluatorPanel != nil { + out.EvaluatorPanel = make([]ServiceBountyPanelSeat, len(in.EvaluatorPanel)) + copy(out.EvaluatorPanel, in.EvaluatorPanel) + } + if in.Evaluations != nil { + l, m := &in.Evaluations, &out.Evaluations + *m = make([]ServiceBountyEvaluation, len(*l)) + for i := range *l { + (*l)[i].DeepCopyInto(&(*m)[i]) + } + } + if in.RevealDeadline != nil { + l, m := &in.RevealDeadline, &out.RevealDeadline + *m = (*l).DeepCopy() + } + if in.PanelSeed != nil { + out.PanelSeed = new(ServiceBountyPanelSeed) + *out.PanelSeed = *in.PanelSeed + } + if in.Escalation != nil { + out.Escalation = new(ServiceBountyEscalation) + in.Escalation.DeepCopyInto(out.Escalation) + } +} + +func (in *ServiceBountyEscalation) DeepCopyInto(out *ServiceBountyEscalation) { + *out = *in + if in.Panel != nil { + out.Panel = make([]ServiceBountyPanelSeat, len(in.Panel)) + copy(out.Panel, in.Panel) + } + if in.Evaluations != nil { + l, m := &in.Evaluations, &out.Evaluations + *m = make([]ServiceBountyEvaluation, len(*l)) + for i := range *l { + (*l)[i].DeepCopyInto(&(*m)[i]) + } + } + if in.RevealDeadline != nil { + l, m := &in.RevealDeadline, &out.RevealDeadline + *m = (*l).DeepCopy() + } + if in.VoucherDeadline != nil { + l, m := &in.VoucherDeadline, &out.VoucherDeadline + *m = (*l).DeepCopy() + } +} + +func (in *ServiceBountyEvaluation) DeepCopyInto(out *ServiceBountyEvaluation) { + *out = *in + if in.RevealedAt != nil { + l, m := &in.RevealedAt, &out.RevealedAt + *m = (*l).DeepCopy() + } +} + +func (in *ServiceBountyClaim) DeepCopyInto(out *ServiceBountyClaim) { + *out = *in + if in.ClaimedAt != nil { + l, m := &in.ClaimedAt, &out.ClaimedAt + *m = (*l).DeepCopy() + } +} diff --git a/internal/monetizeapi/types.go b/internal/monetizeapi/types.go index 18db62ae..7ab6c077 100644 --- a/internal/monetizeapi/types.go +++ b/internal/monetizeapi/types.go @@ -1,6 +1,7 @@ package monetizeapi import ( + "crypto/md5" "fmt" "strings" "time" @@ -20,16 +21,20 @@ const ( Version = "v1alpha1" ServiceOfferKind = "ServiceOffer" + ServiceBountyKind = "ServiceBounty" RegistrationRequestKind = "RegistrationRequest" PurchaseRequestKind = "PurchaseRequest" AgentKind = "Agent" AgentIdentityKind = "AgentIdentity" + EvaluatorEnrollmentKind = "EvaluatorEnrollment" ServiceOfferResource = "serviceoffers" + ServiceBountyResource = "servicebounties" RegistrationRequestResource = "registrationrequests" PurchaseRequestResource = "purchaserequests" AgentResource = "agents" AgentIdentityResource = "agentidentities" + EvaluatorEnrollmentResource = "evaluatorenrollments" // Default identity used for the operator's public ERC-8004 registration // file. The registration file can contain multiple per-chain registrations. @@ -42,10 +47,22 @@ const ( AgentPhaseProvisioning = "Provisioning" AgentPhaseReady = "Ready" AgentPhaseFailed = "Failed" + + // SkillBundleKey is the binaryData key in a type=skill offer's bundle + // ConfigMap that holds the gzipped skill bundle bytes. + SkillBundleKey = "bundle.tar.gz" + // MaxSkillBundleBytes caps the gzipped skill bundle size. The artifact + // rides a ConfigMap (1MiB object cap) and must leave room for base64 + // expansion plus object metadata, so the cap applies to the compressed + // bytes. Enforced at the CLI before the ConfigMap is written and at + // the controller before the bundle server is published. + MaxSkillBundleBytes = 900000 ) var ( ServiceOfferGVR = schema.GroupVersionResource{Group: Group, Version: Version, Resource: ServiceOfferResource} + ServiceBountyGVR = schema.GroupVersionResource{Group: Group, Version: Version, Resource: ServiceBountyResource} + EvaluatorEnrollmentGVR = schema.GroupVersionResource{Group: Group, Version: Version, Resource: EvaluatorEnrollmentResource} RegistrationRequestGVR = schema.GroupVersionResource{Group: Group, Version: Version, Resource: RegistrationRequestResource} PurchaseRequestGVR = schema.GroupVersionResource{Group: Group, Version: Version, Resource: PurchaseRequestResource} AgentGVR = schema.GroupVersionResource{Group: Group, Version: Version, Resource: AgentResource} @@ -98,13 +115,20 @@ type ServiceOfferList struct { Items []ServiceOffer `json:"items"` } +// The spec-level CEL rule below mirrors the per-method payment rules: a +// type=skill offer without spec.skill is rejected at admission time, +// independent of the CLI. (Kept detached from the type's doc comment so +// it does not leak into the generated schema description.) + +// +kubebuilder:validation:XValidation:rule="self.type != 'skill' || has(self.skill)",message="spec.skill is required when type=skill" type ServiceOfferSpec struct { // Service type. 'inference' enables model management; 'http' for any HTTP // service; 'agent' references an Agent CR via spec.agent.ref and the // controller derives upstream + model + skills from the agent's status; - // 'dataset' sells a versioned dataset artifact via spec.dataset. + // 'dataset' sells a versioned dataset artifact via spec.dataset; + // 'skill' sells a downloadable skill bundle via spec.skill. // +kubebuilder:default="http" - // +kubebuilder:validation:Enum=inference;fine-tuning;http;agent;dataset + // +kubebuilder:validation:Enum=inference;fine-tuning;http;agent;dataset;skill Type string `json:"type,omitempty"` // Required when type='agent'. The controller resolves spec.agent.ref to @@ -118,6 +142,14 @@ type ServiceOfferSpec struct { // controller surfaces these in the 402 response's extra.dataset block. Dataset ServiceOfferDataset `json:"dataset,omitempty"` + // Required when type='skill' (enforced by the spec-level XValidation + // rule). Describes the downloadable skill bundle being sold: identity + // (name@version), integrity hash, and the ConfigMap carrying the + // artifact. The controller renders a static bundle server from this + // block and refuses to publish when the ConfigMap bytes do not match + // sha256. + Skill ServiceOfferSkill `json:"skill,omitempty"` + // LLM model metadata. Required when the upstream serves an LLM. Model ServiceOfferModel `json:"model,omitempty"` @@ -189,6 +221,43 @@ type ServiceOfferDataset struct { SizeBytes int64 `json:"sizeBytes,omitempty"` } +// ServiceOfferSkill is populated when Spec.Type == "skill". It pins the +// exact artifact being sold: the bundle identity (name@version), the +// sha256 of the gzipped tar bytes, and the ConfigMap — in the offer's +// namespace — whose binaryData[SkillBundleKey] holds those bytes. The +// controller verifies the hash before publishing the bundle server and +// the verifier surfaces name/version/sha256 in the 402 response's +// extra.skill block so buyers can check the download offline. +type ServiceOfferSkill struct { + // Skill name (e.g. buy-x402). Combined with Version it forms the + // skill ref @ used by ERC-8004 feedback tags. + // +kubebuilder:validation:Required + // +kubebuilder:validation:Pattern=`^[a-z0-9][a-z0-9-]*$` + // +kubebuilder:validation:MaxLength=64 + Name string `json:"name"` + // Skill version (e.g. 0.1.0). + // +kubebuilder:validation:Required + // +kubebuilder:validation:Pattern=`^[A-Za-z0-9][A-Za-z0-9._-]*$` + // +kubebuilder:validation:MaxLength=64 + Version string `json:"version"` + // Lowercase hex sha256 of the gzipped bundle bytes (the exact bytes + // stored in the bundle ConfigMap and served to buyers). + // +kubebuilder:validation:Required + // +kubebuilder:validation:Pattern=`^[a-f0-9]{64}$` + SHA256 string `json:"sha256"` + // Name of a ConfigMap in the offer's namespace whose + // binaryData["bundle.tar.gz"] is the artifact (key: SkillBundleKey). + // +kubebuilder:validation:Required + // +kubebuilder:validation:MaxLength=253 + BundleConfigMap string `json:"bundleConfigMap"` + // Human-friendly display name for catalog surfaces. + // +kubebuilder:validation:MaxLength=128 + DisplayName string `json:"displayName,omitempty"` + // Short human-readable description for catalog surfaces. + // +kubebuilder:validation:MaxLength=1024 + Description string `json:"description,omitempty"` +} + type ServiceOfferModel struct { // Model identifier (e.g. qwen3.5:35b). // +kubebuilder:validation:Required @@ -217,31 +286,90 @@ type ServiceOfferUpstream struct { HealthPath string `json:"healthPath,omitempty"` } +// ServiceOfferPayment describes how buyers pay for the offer. Two methods +// are supported, selected by Method: +// +// - "crypto" (default): x402 on-chain stablecoin settlement. Network and +// PayTo are required and PayTo must be a 0x EVM address. +// - "card": an MPP credit-card method (Stripe stripe.charge). Card is +// required; funds settle off-chain into the configured Stripe account +// and Network/PayTo do not apply. +// +// The per-method required fields are enforced by the XValidation rules +// below so the API server rejects malformed offers at admission time, +// independent of the CLI. The CEL guards short-circuit on Method so the +// 0x/account checks are only evaluated for the relevant method. +// +// +kubebuilder:validation:XValidation:rule="self.method != 'card' ? has(self.payTo) : true",message="payment.payTo is required when payment.method is crypto" +// +kubebuilder:validation:XValidation:rule="self.method != 'card' ? (has(self.network) && size(self.network) > 0) : true",message="payment.network is required when payment.method is crypto" +// +kubebuilder:validation:XValidation:rule="self.method == 'card' ? (has(self.card) && has(self.card.account)) : true",message="payment.card.account is required when payment.method is card" type ServiceOfferPayment struct { - // x402 payment scheme. + // Payment method. "crypto" gates with x402 on-chain stablecoin + // settlement (default; preserves existing behavior). "card" gates with + // an MPP credit-card method (Stripe) that settles off-chain into + // spec.payment.card.account. + // +kubebuilder:default="crypto" + // +kubebuilder:validation:Enum=crypto;card + Method string `json:"method,omitempty"` + // x402 payment scheme. Only meaningful when method=crypto. // +kubebuilder:default="exact" // +kubebuilder:validation:Enum=exact Scheme string `json:"scheme,omitempty"` // Chain identifier for payments (human-friendly). Reconciler resolves - // to CAIP-2 format (e.g., "base-sepolia" → "eip155:84532"). - // +kubebuilder:validation:Required - Network string `json:"network"` - // USDC recipient wallet address (x402: payTo). - // +kubebuilder:validation:Required + // to CAIP-2 format (e.g., "base-sepolia" → "eip155:84532"). Required + // when method=crypto (enforced by the payment XValidation rules); + // unused for card payments. + Network string `json:"network,omitempty"` + // USDC recipient wallet address (x402: payTo). Required and 0x-format + // when method=crypto (enforced by the payment XValidation rules); + // unused for card payments. // +kubebuilder:validation:Pattern=`^0x[0-9a-fA-F]{40}$` - PayTo string `json:"payTo"` + PayTo string `json:"payTo,omitempty"` // Payment validity window in seconds (x402: maxTimeoutSeconds). // +kubebuilder:default=300 MaxTimeoutSeconds int64 `json:"maxTimeoutSeconds,omitempty"` // Optional token metadata override for x402 settlement. When omitted, - // the verifier uses the chain default asset. + // the verifier uses the chain default asset. Crypto only. Asset ServiceOfferAsset `json:"asset,omitempty"` - // Pricing table with per-unit prices in USDC (human-readable decimals). - // Which fields are applicable depends on the workload type. + // Card payment terms. Required when method=card; ignored otherwise. + Card *ServiceOfferCardPayment `json:"card,omitempty"` + // Pricing table with per-unit prices (human-readable decimals). For + // crypto the unit is the settlement token (USDC by default); for card + // the unit is payment.card.currency. Which fields are applicable + // depends on the workload type. // +kubebuilder:validation:Required Price ServiceOfferPriceTable `json:"price"` } +// ServiceOfferCardPayment holds the off-chain credit-card settlement terms +// used when ServiceOfferPayment.Method == "card". It is the card-method +// analog of Network/PayTo: instead of a chain plus a 0x recipient, funds +// settle through a payment provider (Stripe today, via the MPP +// stripe.charge method) into Account. +type ServiceOfferCardPayment struct { + // Card payment provider. Only "stripe" is supported today (MPP + // stripe.charge via Shared Payment Tokens). + // +kubebuilder:default="stripe" + // +kubebuilder:validation:Enum=stripe + Provider string `json:"provider,omitempty"` + // Destination account that receives settled card funds. For Stripe this + // is the connected/destination account id (e.g. "acct_1A2b3C4d5E6f7G"). + // +kubebuilder:validation:Pattern=`^acct_[A-Za-z0-9]+$` + Account string `json:"account,omitempty"` + // ISO-4217 currency the card is charged in. Default "usd". + // +kubebuilder:default="usd" + // +kubebuilder:validation:Pattern=`^[a-z]{3}$` + Currency string `json:"currency,omitempty"` + // Optional Stripe "machine payments" network id, surfaced in the 402 + // challenge's extra block so MPP card clients know where to mint a + // Shared Payment Token. + NetworkID string `json:"networkId,omitempty"` + // Accepted payment-method types advertised to the client. Defaults to + // ["card"] at the gateway when empty. + // +kubebuilder:validation:MaxItems=16 + PaymentMethodTypes []string `json:"paymentMethodTypes,omitempty"` +} + type ServiceOfferAsset struct { // ERC-20 contract address. // +kubebuilder:validation:Pattern=`^0x[0-9a-fA-F]{40}$` @@ -457,6 +585,14 @@ func (o *ServiceOffer) IsDataset() bool { return o.Spec.Type == "dataset" } +// IsSkill reports whether the offer sells a downloadable skill bundle. +// Type=="skill" is the only signal — Spec.Skill must also be populated +// for a usable offer, but admission validation (the spec-level CEL rule) +// enforces that. +func (o *ServiceOffer) IsSkill() bool { + return o.Spec.Type == "skill" +} + // IsDraining reports whether spec.drainAt has been set. Drained offers // transition through three phases: pre-drain (DrainAt nil), draining // (DrainAt set, now < DrainEndsAt), and drain-expired (DrainAt set, @@ -494,6 +630,42 @@ func (o *ServiceOffer) DrainExpired(now time.Time) bool { return !now.Before(end) } +// maxK8sNameLen is the maximum length for a Kubernetes resource name +// (DNS subdomain). +const maxK8sNameLen = 253 + +// maxK8sServiceNameLen is the maximum length for a Kubernetes Service +// name (RFC 1035 label) and for label VALUES (the workload name doubles +// as the children's "app" label). +const maxK8sServiceNameLen = 63 + +// SkillBundleWorkloadName returns the deterministic name of the bundle +// server children (Deployment/Service/meta ConfigMap) rendered for a +// type=skill offer: "so--bundle". It lives in monetizeapi so the +// CLI (which pins spec.upstream.service to it), the controller (which +// renders the children and rejects spoofed upstreams), and the x402 +// route source share one definition without an import cycle. Mirrors +// serviceoffercontroller.safeName, but caps at the 63-char RFC 1035 +// Service-name/label limit (not the 253-char object-name limit — the +// name is also a Service name and an "app" label value): longer offer +// names are truncated with a short hash appended to avoid collisions. +func SkillBundleWorkloadName(offerName string) string { + const ( + prefix = "so-" + suffix = "-bundle" + ) + full := prefix + offerName + suffix + if len(full) <= maxK8sServiceNameLen { + return full + } + hash := fmt.Sprintf("%x", md5.Sum([]byte(offerName)))[:8] + maxName := maxK8sServiceNameLen - len(prefix) - len(suffix) - 1 - len(hash) // 1 for the dash before hash + if maxName < 1 { + maxName = 1 + } + return prefix + offerName[:maxName] + "-" + hash + suffix +} + // ── PurchaseRequest ───────────────────────────────────────────────────────── // +kubebuilder:object:root=true diff --git a/internal/monetizeapi/zz_generated.deepcopy.go b/internal/monetizeapi/zz_generated.deepcopy.go index a541cc81..ba549c53 100644 --- a/internal/monetizeapi/zz_generated.deepcopy.go +++ b/internal/monetizeapi/zz_generated.deepcopy.go @@ -8,7 +8,7 @@ package monetizeapi import ( "k8s.io/apimachinery/pkg/apis/meta/v1" - runtime "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/runtime" ) // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. @@ -253,6 +253,51 @@ func (in *Condition) DeepCopy() *Condition { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *EvaluatorAttestation) DeepCopyInto(out *EvaluatorAttestation) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new EvaluatorAttestation. +func (in *EvaluatorAttestation) DeepCopy() *EvaluatorAttestation { + if in == nil { + return nil + } + out := new(EvaluatorAttestation) + in.DeepCopyInto(out) + return out +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new EvaluatorEnrollmentSpec. +func (in *EvaluatorEnrollmentSpec) DeepCopy() *EvaluatorEnrollmentSpec { + if in == nil { + return nil + } + out := new(EvaluatorEnrollmentSpec) + in.DeepCopyInto(out) + return out +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new EvaluatorEnrollmentStatus. +func (in *EvaluatorEnrollmentStatus) DeepCopy() *EvaluatorEnrollmentStatus { + if in == nil { + return nil + } + out := new(EvaluatorEnrollmentStatus) + in.DeepCopyInto(out) + return out +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new EvaluatorLadderRecord. +func (in *EvaluatorLadderRecord) DeepCopy() *EvaluatorLadderRecord { + if in == nil { + return nil + } + out := new(EvaluatorLadderRecord) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *PurchaseAutoRefill) DeepCopyInto(out *PurchaseAutoRefill) { *out = *in @@ -477,6 +522,215 @@ func (in *RegistrationRequestStatus) DeepCopy() *RegistrationRequestStatus { return out } +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ServiceBountyAcceptance. +func (in *ServiceBountyAcceptance) DeepCopy() *ServiceBountyAcceptance { + if in == nil { + return nil + } + out := new(ServiceBountyAcceptance) + in.DeepCopyInto(out) + return out +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ServiceBountyClaim. +func (in *ServiceBountyClaim) DeepCopy() *ServiceBountyClaim { + if in == nil { + return nil + } + out := new(ServiceBountyClaim) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ServiceBountyDatasetCommit) DeepCopyInto(out *ServiceBountyDatasetCommit) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ServiceBountyDatasetCommit. +func (in *ServiceBountyDatasetCommit) DeepCopy() *ServiceBountyDatasetCommit { + if in == nil { + return nil + } + out := new(ServiceBountyDatasetCommit) + in.DeepCopyInto(out) + return out +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ServiceBountyEscalation. +func (in *ServiceBountyEscalation) DeepCopy() *ServiceBountyEscalation { + if in == nil { + return nil + } + out := new(ServiceBountyEscalation) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ServiceBountyEscrow) DeepCopyInto(out *ServiceBountyEscrow) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ServiceBountyEscrow. +func (in *ServiceBountyEscrow) DeepCopy() *ServiceBountyEscrow { + if in == nil { + return nil + } + out := new(ServiceBountyEscrow) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ServiceBountyEval) DeepCopyInto(out *ServiceBountyEval) { + *out = *in + out.Payment = in.Payment +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ServiceBountyEval. +func (in *ServiceBountyEval) DeepCopy() *ServiceBountyEval { + if in == nil { + return nil + } + out := new(ServiceBountyEval) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ServiceBountyEvalPayment) DeepCopyInto(out *ServiceBountyEvalPayment) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ServiceBountyEvalPayment. +func (in *ServiceBountyEvalPayment) DeepCopy() *ServiceBountyEvalPayment { + if in == nil { + return nil + } + out := new(ServiceBountyEvalPayment) + in.DeepCopyInto(out) + return out +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ServiceBountyEvaluation. +func (in *ServiceBountyEvaluation) DeepCopy() *ServiceBountyEvaluation { + if in == nil { + return nil + } + out := new(ServiceBountyEvaluation) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ServiceBountyPanelSeat) DeepCopyInto(out *ServiceBountyPanelSeat) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ServiceBountyPanelSeat. +func (in *ServiceBountyPanelSeat) DeepCopy() *ServiceBountyPanelSeat { + if in == nil { + return nil + } + out := new(ServiceBountyPanelSeat) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ServiceBountyPanelSeed) DeepCopyInto(out *ServiceBountyPanelSeed) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ServiceBountyPanelSeed. +func (in *ServiceBountyPanelSeed) DeepCopy() *ServiceBountyPanelSeed { + if in == nil { + return nil + } + out := new(ServiceBountyPanelSeed) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ServiceBountyReward) DeepCopyInto(out *ServiceBountyReward) { + *out = *in + out.Asset = in.Asset + out.Escrow = in.Escrow +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ServiceBountyReward. +func (in *ServiceBountyReward) DeepCopy() *ServiceBountyReward { + if in == nil { + return nil + } + out := new(ServiceBountyReward) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ServiceBountySelfBond) DeepCopyInto(out *ServiceBountySelfBond) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ServiceBountySelfBond. +func (in *ServiceBountySelfBond) DeepCopy() *ServiceBountySelfBond { + if in == nil { + return nil + } + out := new(ServiceBountySelfBond) + in.DeepCopyInto(out) + return out +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ServiceBountySpec. +func (in *ServiceBountySpec) DeepCopy() *ServiceBountySpec { + if in == nil { + return nil + } + out := new(ServiceBountySpec) + in.DeepCopyInto(out) + return out +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ServiceBountyStatus. +func (in *ServiceBountyStatus) DeepCopy() *ServiceBountyStatus { + if in == nil { + return nil + } + out := new(ServiceBountyStatus) + in.DeepCopyInto(out) + return out +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ServiceBountyTask. +func (in *ServiceBountyTask) DeepCopy() *ServiceBountyTask { + if in == nil { + return nil + } + out := new(ServiceBountyTask) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ServiceBountyTrust) DeepCopyInto(out *ServiceBountyTrust) { + *out = *in + out.SelfBond = in.SelfBond +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ServiceBountyTrust. +func (in *ServiceBountyTrust) DeepCopy() *ServiceBountyTrust { + if in == nil { + return nil + } + out := new(ServiceBountyTrust) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *ServiceOffer) DeepCopyInto(out *ServiceOffer) { *out = *in @@ -570,6 +824,26 @@ func (in *ServiceOfferAsset) DeepCopy() *ServiceOfferAsset { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ServiceOfferCardPayment) DeepCopyInto(out *ServiceOfferCardPayment) { + *out = *in + if in.PaymentMethodTypes != nil { + in, out := &in.PaymentMethodTypes, &out.PaymentMethodTypes + *out = make([]string, len(*in)) + copy(*out, *in) + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ServiceOfferCardPayment. +func (in *ServiceOfferCardPayment) DeepCopy() *ServiceOfferCardPayment { + if in == nil { + return nil + } + out := new(ServiceOfferCardPayment) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *ServiceOfferDataset) DeepCopyInto(out *ServiceOfferDataset) { *out = *in @@ -636,6 +910,11 @@ func (in *ServiceOfferModel) DeepCopy() *ServiceOfferModel { func (in *ServiceOfferPayment) DeepCopyInto(out *ServiceOfferPayment) { *out = *in out.Asset = in.Asset + if in.Card != nil { + in, out := &in.Card, &out.Card + *out = new(ServiceOfferCardPayment) + (*in).DeepCopyInto(*out) + } out.Price = in.Price } @@ -721,14 +1000,30 @@ func (in *ServiceOfferService) DeepCopy() *ServiceOfferService { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ServiceOfferSkill) DeepCopyInto(out *ServiceOfferSkill) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ServiceOfferSkill. +func (in *ServiceOfferSkill) DeepCopy() *ServiceOfferSkill { + if in == nil { + return nil + } + out := new(ServiceOfferSkill) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *ServiceOfferSpec) DeepCopyInto(out *ServiceOfferSpec) { *out = *in out.Agent = in.Agent out.Dataset = in.Dataset + out.Skill = in.Skill out.Model = in.Model out.Upstream = in.Upstream - out.Payment = in.Payment + in.Payment.DeepCopyInto(&out.Payment) if in.Provenance != nil { in, out := &in.Provenance, &out.Provenance *out = make(map[string]string, len(*in)) diff --git a/internal/openclaw/monetize_integration_test.go b/internal/openclaw/monetize_integration_test.go index 5133582f..22538818 100644 --- a/internal/openclaw/monetize_integration_test.go +++ b/internal/openclaw/monetize_integration_test.go @@ -70,6 +70,13 @@ func cleanupPurchaseRequestsForTest(t *testing.T, cfg *config.Config) { t.Helper() namespace := agentNamespace(cfg) + // Delete FIRST, then strip finalizers. The controller re-adds its + // finalizer to any live PurchaseRequest, so clearing before delete is a + // no-op race; and a deleting purchase with unspent auths intentionally + // drains (stays Terminating) — the finalizer strip is the test's + // force-path past that drain. + _, _ = obolRunErr(cfg, "kubectl", "delete", "purchaserequests.obol.org", + "-n", namespace, "--all", "--ignore-not-found", "--wait=false") if out, err := obolRunErr(cfg, "kubectl", "get", "purchaserequests.obol.org", "-n", namespace, "-o", "name"); err == nil { for _, name := range strings.Fields(out) { @@ -77,8 +84,15 @@ func cleanupPurchaseRequestsForTest(t *testing.T, cfg *config.Config) { "-n", namespace, "--type=merge", "-p", `{"metadata":{"finalizers":[]}}`) } } - _, _ = obolRunErr(cfg, "kubectl", "delete", "purchaserequests.obol.org", - "-n", namespace, "--all", "--ignore-not-found", "--wait=false") + for i := 0; i < 12; i++ { + out, err := obolRunErr(cfg, "kubectl", "get", "purchaserequests.obol.org", + "-n", namespace, "-o", "name") + if err == nil && strings.TrimSpace(out) == "" { + return + } + time.Sleep(5 * time.Second) + } + t.Logf("warning: PurchaseRequests still present in %s after cleanup", namespace) } // getServiceOffer returns the ServiceOffer as a parsed JSON map. @@ -1101,18 +1115,30 @@ req = urllib.request.Request( }, ) -try: - with urllib.request.urlopen(req, timeout=180) as resp: +# Transport-level errors (connection refused) are retried: the controller +# may roll the LiteLLM deployment to publish the paid/ route right +# before the first paid call, and the Service can briefly point at a +# terminating pod. HTTP errors are NOT retried — their status codes are +# what the test asserts on. +import time +for attempt in range(12): + try: + with urllib.request.urlopen(req, timeout=180) as resp: + sys.stdout.write(json.dumps({ + "status": resp.status, + "body": resp.read().decode(), + })) + break + except urllib.error.HTTPError as err: sys.stdout.write(json.dumps({ - "status": resp.status, - "body": resp.read().decode(), + "status": err.code, + "body": err.read().decode(), })) -except urllib.error.HTTPError as err: - sys.stdout.write(json.dumps({ - "status": err.code, - "body": err.read().decode(), - })) - sys.exit(1) + sys.exit(1) + except urllib.error.URLError: + if attempt == 11: + raise + time.sleep(5) `, model, prompt, "http://litellm.llm.svc.cluster.local:4000/v1/chat/completions", "Bearer "+masterKey) out, err := execInAgentErr(cfg, "python3", "-c", script) @@ -3886,6 +3912,13 @@ func TestIntegration_SellBuySidecar_OBOLPermit2(t *testing.T) { } anvil.FundETH(t, agentWallet, big.NewInt(1e18)) anvil.MintMintableERC20(t, obolToken, anvil.Accounts[0].PrivateKey, agentWallet, new(big.Int).Mul(big.NewInt(10), big.NewInt(1e18))) + // One-time approve(Permit2, max) the buyer wallet owner does on a real + // chain. The fork token is not the registry's canonical OBOL address, so + // the 402 never advertises eip2612GasSponsoring (anti-spoof check in + // internal/x402/chains.go) and buy.py's allowance preflight requires a + // real allowance. Earlier green runs only skipped that preflight when + // the allowance read raced eRPC pin propagation and missed the fork. + anvil.ApprovePermit2ViaImpersonation(t, obolToken, agentWallet) t.Logf("funded agent wallet %s with 10 OBOL on fork token %s", agentWallet, obolToken) originalERPCConfig := getERPCConfigYAML(t, cfg) diff --git a/internal/research/groupauth/groupauth.go b/internal/research/groupauth/groupauth.go new file mode 100644 index 00000000..146b1cc2 --- /dev/null +++ b/internal/research/groupauth/groupauth.go @@ -0,0 +1,323 @@ +// Package groupauth is the membership/OAuth layer for decentralized +// auto-research groups: it issues and verifies the tokens that gate a +// research program's PRIVATE collective knowledge base. +// +// It is an RFC 8628-style device-authorization flow adapted from the +// Darkbloom / d-inference coordinator (coordinator/api/device_auth.go) — +// same shape, research-group semantics: +// +// 1. A worker agent calls RequestCode() → device_code + user_code +// 2. The PROGRAM OWNER approves the user_code → links it to the group +// (this is the membership decision — "coordination at the OAuth level") +// 3. The worker polls Poll(device_code) → a member token +// 4. The worker presents the token to the program's → read/write the +// private knowledge-base service group-private KB +// +// The store is in-memory and dependency-free on purpose: a research +// program is a small, single-coordinator group, and keeping this package +// free of cluster/store deps lets the serviceoffer-controller embed it +// (or a sidecar mount it) without pulling the whole coordinator in. Tokens +// are persisted only as SHA-256 hashes; the raw token is returned once. +package groupauth + +import ( + "crypto/rand" + "crypto/sha256" + "encoding/hex" + "errors" + "math/big" + "strings" + "sync" + "time" +) + +const ( + // CodeExpiry is how long an unused device code stays valid. + CodeExpiry = 15 * time.Minute + // PollInterval is the minimum seconds a worker should wait between polls. + PollInterval = 5 + // tokenPrefix namespaces member tokens (mirrors the d-inference prefix + // convention so tokens are self-identifying in logs). + tokenPrefix = "obol-research-mt-" + // userCharset excludes ambiguous glyphs (0/O, 1/I/L). + userCharset = "ABCDEFGHJKMNPQRSTUVWXYZ23456789" +) + +// Status values for a device code. +const ( + StatusPending = "pending" + StatusApproved = "approved" +) + +var ( + // ErrNotFound is returned when a device code or token is unknown. + ErrNotFound = errors.New("groupauth: not found") + // ErrExpired is returned when a device code has passed its expiry. + ErrExpired = errors.New("groupauth: device code expired") + // ErrAlreadyUsed is returned when approving a non-pending code. + ErrAlreadyUsed = errors.New("groupauth: code already used") +) + +// deviceCode is an in-flight authorization request. +type deviceCode struct { + DeviceCode string + UserCode string + Status string + GroupID string // the ResearchProgram id, set on approval + WorkerID string // optional self-declared worker label + ExpiresAt time.Time +} + +// memberToken is an issued, hashed membership credential. +type memberToken struct { + TokenHash string + GroupID string + Label string + IssuedAt time.Time + Active bool +} + +// CodeGrant is returned to a worker starting a login. +type CodeGrant struct { + DeviceCode string `json:"device_code"` + UserCode string `json:"user_code"` + ExpiresIn int `json:"expires_in"` + Interval int `json:"interval"` +} + +// PollResult is returned while a worker polls for approval. +type PollResult struct { + // Status is "authorization_pending" until approved, then "authorized". + Status string `json:"status"` + // Token is set only once, when Status first becomes "authorized". + Token string `json:"token,omitempty"` + GroupID string `json:"group_id,omitempty"` +} + +// Authority issues and verifies a single research group's membership +// tokens. One Authority per ResearchProgram; safe for concurrent use. +type Authority struct { + mu sync.Mutex + byDevice map[string]*deviceCode + byUser map[string]*deviceCode + tokens map[string]*memberToken // keyed by token hash + now func() time.Time // injectable clock for tests +} + +// New returns an empty Authority. +func New() *Authority { + return &Authority{ + byDevice: map[string]*deviceCode{}, + byUser: map[string]*deviceCode{}, + tokens: map[string]*memberToken{}, + now: time.Now, + } +} + +// RequestCode starts a device login for a worker. workerID is an optional +// self-declared label (e.g. an address or agent name) recorded for the +// owner's approval decision; it is NOT trusted as identity. +func (a *Authority) RequestCode(workerID string) (CodeGrant, error) { + a.mu.Lock() + defer a.mu.Unlock() + + dcVal, err := randomHex(32) + if err != nil { + return CodeGrant{}, err + } + userCode, err := generateUserCode() + if err != nil { + return CodeGrant{}, err + } + // Resolve the rare user-code collision before storing. + for i := 0; i < 5; i++ { + if _, taken := a.byUser[userCode]; !taken { + break + } + if userCode, err = generateUserCode(); err != nil { + return CodeGrant{}, err + } + } + + dc := &deviceCode{ + DeviceCode: dcVal, + UserCode: userCode, + Status: StatusPending, + WorkerID: strings.TrimSpace(workerID), + ExpiresAt: a.now().Add(CodeExpiry), + } + a.byDevice[dcVal] = dc + a.byUser[userCode] = dc + + return CodeGrant{ + DeviceCode: dcVal, + UserCode: userCode, + ExpiresIn: int(CodeExpiry.Seconds()), + Interval: PollInterval, + }, nil +} + +// Approve is the membership decision: the program owner links a pending +// user_code to the group. Only the owner calls this (it is the gate that +// makes the knowledge base private to the group). user_code matching is +// case-insensitive and space-trimmed. +func (a *Authority) Approve(groupID, userCode string) error { + a.mu.Lock() + defer a.mu.Unlock() + + dc, ok := a.byUser[normalizeUserCode(userCode)] + if !ok { + return ErrNotFound + } + if a.now().After(dc.ExpiresAt) { + return ErrExpired + } + if dc.Status != StatusPending { + return ErrAlreadyUsed + } + dc.Status = StatusApproved + dc.GroupID = strings.TrimSpace(groupID) + return nil +} + +// Poll returns the authorization state for a device code. On the first +// poll after approval it mints and returns the raw member token exactly +// once; only its SHA-256 hash is retained. +func (a *Authority) Poll(deviceCode string) (PollResult, error) { + a.mu.Lock() + defer a.mu.Unlock() + + dc, ok := a.byDevice[deviceCode] + if !ok { + return PollResult{}, ErrNotFound + } + if a.now().After(dc.ExpiresAt) { + return PollResult{}, ErrExpired + } + + if dc.Status != StatusApproved { + return PollResult{Status: "authorization_pending"}, nil + } + + raw, err := randomHex(32) + if err != nil { + return PollResult{}, err + } + rawToken := tokenPrefix + raw + a.tokens[hashToken(rawToken)] = &memberToken{ + TokenHash: hashToken(rawToken), + GroupID: dc.GroupID, + Label: "device-" + dc.UserCode, + IssuedAt: a.now(), + Active: true, + } + // Consume the code so the token is issued once. + delete(a.byDevice, dc.DeviceCode) + delete(a.byUser, dc.UserCode) + + return PollResult{Status: "authorized", Token: rawToken, GroupID: dc.GroupID}, nil +} + +// VerifyToken reports whether rawToken is an active member token and, if +// so, the group it grants access to. This is what the private knowledge- +// base service calls on each request. +func (a *Authority) VerifyToken(rawToken string) (groupID string, ok bool) { + a.mu.Lock() + defer a.mu.Unlock() + + t, found := a.tokens[hashToken(rawToken)] + if !found || !t.Active { + return "", false + } + return t.GroupID, true +} + +// Revoke deactivates a member token (owner removing a worker from the group). +func (a *Authority) Revoke(rawToken string) { + a.mu.Lock() + defer a.mu.Unlock() + if t, found := a.tokens[hashToken(rawToken)]; found { + t.Active = false + } +} + +// HashToken returns the SHA-256 hex hash of a raw member token. Exposed so a +// caller (e.g. a payment-gated service's entitlement map) can key off the +// same hash the Authority stores without ever holding the raw token. +func HashToken(rawToken string) string { return hashToken(rawToken) } + +// Mint issues a member token for groupID WITHOUT the device-auth flow, for +// services where a settled payment — not an owner approval — is the +// membership decision. The raw token is returned exactly once; only its hash +// is retained (returned too, so the caller can persist it). label is a +// non-trusted descriptive tag. +func (a *Authority) Mint(groupID, label string) (rawToken, tokenHash string, err error) { + raw, err := randomHex(32) + if err != nil { + return "", "", err + } + rawToken = tokenPrefix + raw + tokenHash = hashToken(rawToken) + + a.mu.Lock() + defer a.mu.Unlock() + a.tokens[tokenHash] = &memberToken{ + TokenHash: tokenHash, + GroupID: strings.TrimSpace(groupID), + Label: strings.TrimSpace(label), + IssuedAt: a.now(), + Active: true, + } + return rawToken, tokenHash, nil +} + +// RegisterHash re-registers a previously issued token by its hash, marking it +// active for groupID. It lets a persistent store rehydrate the in-memory +// Authority after a restart without ever seeing the raw token (the Authority +// verifies by hash). A blank hash is ignored; otherwise idempotent. +func (a *Authority) RegisterHash(tokenHash, groupID, label string) { + tokenHash = strings.TrimSpace(tokenHash) + if tokenHash == "" { + return + } + a.mu.Lock() + defer a.mu.Unlock() + a.tokens[tokenHash] = &memberToken{ + TokenHash: tokenHash, + GroupID: strings.TrimSpace(groupID), + Label: strings.TrimSpace(label), + IssuedAt: a.now(), + Active: true, + } +} + +// --- helpers --- + +func generateUserCode() (string, error) { + code := make([]byte, 8) + for i := range code { + n, err := rand.Int(rand.Reader, big.NewInt(int64(len(userCharset)))) + if err != nil { + return "", err + } + code[i] = userCharset[n.Int64()] + } + return string(code[:4]) + "-" + string(code[4:]), nil +} + +func normalizeUserCode(s string) string { + return strings.ToUpper(strings.TrimSpace(s)) +} + +func randomHex(n int) (string, error) { + b := make([]byte, n) + if _, err := rand.Read(b); err != nil { + return "", err + } + return hex.EncodeToString(b), nil +} + +func hashToken(s string) string { + h := sha256.Sum256([]byte(s)) + return hex.EncodeToString(h[:]) +} diff --git a/internal/research/groupauth/groupauth_test.go b/internal/research/groupauth/groupauth_test.go new file mode 100644 index 00000000..421b1cca --- /dev/null +++ b/internal/research/groupauth/groupauth_test.go @@ -0,0 +1,98 @@ +package groupauth + +import ( + "strings" + "testing" + "time" +) + +func TestDeviceFlow_CodeApproveTokenVerify(t *testing.T) { + a := New() + + grant, err := a.RequestCode("0xworker") + if err != nil { + t.Fatalf("RequestCode: %v", err) + } + if grant.DeviceCode == "" || grant.UserCode == "" { + t.Fatal("empty grant") + } + if grant.Interval != PollInterval || grant.ExpiresIn <= 0 { + t.Errorf("grant metadata = %+v", grant) + } + + // Before approval: pending, no token. + if r, err := a.Poll(grant.DeviceCode); err != nil || r.Status != "authorization_pending" || r.Token != "" { + t.Fatalf("pre-approval poll = %+v err=%v", r, err) + } + + // Owner approves the user_code (case/space-insensitive). + if err := a.Approve("nanogpt-valbpb", " "+strings.ToLower(grant.UserCode)+" "); err != nil { + t.Fatalf("Approve: %v", err) + } + + // First post-approval poll mints the token. + res, err := a.Poll(grant.DeviceCode) + if err != nil { + t.Fatalf("Poll after approve: %v", err) + } + if res.Status != "authorized" || res.GroupID != "nanogpt-valbpb" { + t.Fatalf("poll result = %+v", res) + } + if !strings.HasPrefix(res.Token, tokenPrefix) { + t.Errorf("token = %q, want %s prefix", res.Token, tokenPrefix) + } + + // The token verifies and names the group. + if gid, ok := a.VerifyToken(res.Token); !ok || gid != "nanogpt-valbpb" { + t.Errorf("VerifyToken = %q,%v", gid, ok) + } + + // Token is single-issue: the code is consumed. + if _, err := a.Poll(grant.DeviceCode); err != ErrNotFound { + t.Errorf("second poll err = %v, want ErrNotFound", err) + } + + // Revocation removes access. + a.Revoke(res.Token) + if _, ok := a.VerifyToken(res.Token); ok { + t.Error("revoked token still verifies") + } +} + +func TestApprove_Errors(t *testing.T) { + a := New() + if err := a.Approve("g", "NOPE-NOPE"); err != ErrNotFound { + t.Errorf("unknown code err = %v, want ErrNotFound", err) + } + + grant, _ := a.RequestCode("") + if err := a.Approve("g", grant.UserCode); err != nil { + t.Fatalf("first approve: %v", err) + } + if err := a.Approve("g", grant.UserCode); err != ErrAlreadyUsed { + t.Errorf("double approve err = %v, want ErrAlreadyUsed", err) + } +} + +func TestExpiry(t *testing.T) { + a := New() + base := time.Unix(1_700_000_000, 0) + a.now = func() time.Time { return base } + + grant, _ := a.RequestCode("w") + a.now = func() time.Time { return base.Add(CodeExpiry + time.Second) } + + if err := a.Approve("g", grant.UserCode); err != ErrExpired { + t.Errorf("approve expired err = %v, want ErrExpired", err) + } + if _, err := a.Poll(grant.DeviceCode); err != ErrExpired { + t.Errorf("poll expired err = %v, want ErrExpired", err) + } +} + +func TestVerify_UnknownToken(t *testing.T) { + a := New() + if _, ok := a.VerifyToken("obol-research-mt-deadbeef"); ok { + t.Error("unknown token must not verify") + } +} diff --git a/internal/research/kb/kb.go b/internal/research/kb/kb.go new file mode 100644 index 00000000..adf0690d --- /dev/null +++ b/internal/research/kb/kb.go @@ -0,0 +1,288 @@ +// Package kb is the collective knowledge base for a decentralized +// auto-research program: the workspace AutoScientists agents coordinate +// through (results log, champion, roster), plus the acceptance rule +// (autoresearch's KEEP) and the impact-proportional payout split. +// +// It is pure and concurrency-safe; the HTTP surface (membership gating, +// device-auth) lives in internal/research/server. +package kb + +import ( + "errors" + "math" + "sort" + "sync" + "time" +) + +// Direction is whether lower or higher metric values are better. +type Direction string + +const ( + Minimize Direction = "minimize" + Maximize Direction = "maximize" +) + +// AcceptMode is the KEEP rule for a submitted result. +type AcceptMode string + +const ( + // BeatsChampion accepts a result that improves on the current champion + // (or the published Baseline when there is no champion yet). + BeatsChampion AcceptMode = "beats-champion" + // Threshold accepts any result that clears Criteria.Threshold. + Threshold AcceptMode = "threshold" +) + +// SplitMode decides how the reward pool is divided at close. +type SplitMode string + +const ( + // ByImpact divides the pool proportional to each accepted result's + // validated impact (the metric improvement it delivered). + ByImpact SplitMode = "by-impact" + // ChampionTakesAll pays the whole pool to the final champion's worker. + ChampionTakesAll SplitMode = "champion-takes-all" +) + +// Criteria mirrors AutoScientists' TASK.md frontmatter: an arbitrary +// metric name + a direction + the KEEP rule. Nothing domain-specific. +type Criteria struct { + Metric string `json:"metric"` + Direction Direction `json:"direction"` + Accept AcceptMode `json:"accept"` + Threshold *float64 `json:"threshold,omitempty"` +} + +// Program is the published research ID. +type Program struct { + ID string `json:"id"` + Objective string `json:"objective"` + Criteria Criteria `json:"criteria"` + Baseline *float64 `json:"baseline,omitempty"` // reference metric; first improvement's impact is measured against it + Pool float64 `json:"pool"` + Token string `json:"token"` + Network string `json:"network"` + Split SplitMode `json:"split"` +} + +// Result is one submitted experiment outcome. +type Result struct { + Seq int `json:"seq"` + Worker string `json:"worker"` + Value float64 `json:"value"` + Output string `json:"output,omitempty"` // raw train.py tail, for audit + At time.Time `json:"at"` + Accepted bool `json:"accepted"` + Impact float64 `json:"impact"` + Champion bool `json:"champion"` // true if this result became the champion +} + +// KB holds one program's collective state. +type KB struct { + mu sync.Mutex + prog Program + results []Result + champion *Result + roster map[string]time.Time + seq int + now func() time.Time +} + +// New returns a KB for a program. +func New(p Program) *KB { + return &KB{prog: p, roster: map[string]time.Time{}, now: time.Now} +} + +// Program returns the published program (immutable copy). +func (k *KB) Program() Program { + k.mu.Lock() + defer k.mu.Unlock() + return k.prog +} + +// Join records a worker in the roster (idempotent). +func (k *KB) Join(worker string) { + k.mu.Lock() + defer k.mu.Unlock() + if _, ok := k.roster[worker]; !ok { + k.roster[worker] = k.now() + } +} + +// Roster returns the joined workers and when they joined. +func (k *KB) Roster() map[string]time.Time { + k.mu.Lock() + defer k.mu.Unlock() + out := make(map[string]time.Time, len(k.roster)) + for w, t := range k.roster { + out[w] = t + } + return out +} + +// Champion returns a copy of the current champion result, or nil. +func (k *KB) Champion() *Result { + k.mu.Lock() + defer k.mu.Unlock() + if k.champion == nil { + return nil + } + c := *k.champion + return &c +} + +// Results returns the full results log in submission order. +func (k *KB) Results() []Result { + k.mu.Lock() + defer k.mu.Unlock() + out := make([]Result, len(k.results)) + copy(out, k.results) + return out +} + +// Submit applies the KEEP rule to a worker's result: it records the result, +// decides acceptance + impact, and promotes the champion if it improved. +// Submission is serialized, so the FIRST result to beat the current best +// wins it (first-verified-wins). A worker that is not in the roster is +// auto-joined. +func (k *KB) Submit(worker string, value float64, output string) (Result, error) { + if worker == "" { + return Result{}, errors.New("kb: worker required") + } + if math.IsNaN(value) || math.IsInf(value, 0) { + return Result{}, errors.New("kb: value must be finite") + } + + k.mu.Lock() + defer k.mu.Unlock() + + if _, ok := k.roster[worker]; !ok { + k.roster[worker] = k.now() + } + + k.seq++ + r := Result{Seq: k.seq, Worker: worker, Value: value, Output: output, At: k.now()} + + switch k.prog.Criteria.Accept { + case Threshold: + if k.prog.Criteria.Threshold != nil { + th := *k.prog.Criteria.Threshold + if k.better(value, th) || value == th { + r.Accepted = true + r.Impact = k.gain(th, value) + } + } + // The champion under threshold mode is still the best-so-far. + if r.Accepted && (k.champion == nil || k.better(value, k.champion.Value)) { + r.Champion = true + } + default: // BeatsChampion + ref, hasRef := k.bestRef() + if !hasRef || k.better(value, ref) { + r.Accepted = true + r.Champion = true + if hasRef { + r.Impact = k.gain(ref, value) + } else { + // First result with no Baseline sets the baseline only. + r.Impact = 0 + } + } + } + + if r.Champion { + c := r + k.champion = &c + } + k.results = append(k.results, r) + return r, nil +} + +// Payouts splits the reward pool across workers per the program's SplitMode. +// Returns worker -> amount (rounded to 6 dp). Empty when nothing is owed. +func (k *KB) Payouts() map[string]float64 { + k.mu.Lock() + defer k.mu.Unlock() + + out := map[string]float64{} + if k.prog.Pool <= 0 { + return out + } + + if k.prog.Split == ChampionTakesAll { + if k.champion != nil { + out[k.champion.Worker] = round6(k.prog.Pool) + } + return out + } + + // ByImpact (default): proportional to accepted impact. + total := 0.0 + per := map[string]float64{} + for _, r := range k.results { + if r.Accepted && r.Impact > 0 { + per[r.Worker] += r.Impact + total += r.Impact + } + } + if total <= 0 { + return out + } + for w, imp := range per { + out[w] = round6(k.prog.Pool * imp / total) + } + return out +} + +// --- helpers --- + +// better reports whether a is strictly better than b under the direction. +func (k *KB) better(a, b float64) bool { + if k.prog.Criteria.Direction == Maximize { + return a > b + } + return a < b +} + +// gain is the non-negative improvement of value over ref under the +// program's direction (ref-value when minimizing, value-ref when maximizing). +func (k *KB) gain(ref, value float64) float64 { + var d float64 + if k.prog.Criteria.Direction == Maximize { + d = value - ref + } else { + d = ref - value + } + if d < 0 { + d = 0 + } + return d +} + +// bestRef returns the current reference to beat (champion value, else +// Baseline) and whether one exists. +func (k *KB) bestRef() (float64, bool) { + if k.champion != nil { + return k.champion.Value, true + } + if k.prog.Baseline != nil { + return *k.prog.Baseline, true + } + return 0, false +} + +// Sorted snapshot helper for stable status output. +func (k *KB) RosterSorted() []string { + r := k.Roster() + ws := make([]string, 0, len(r)) + for w := range r { + ws = append(ws, w) + } + sort.Strings(ws) + return ws +} + +func round6(f float64) float64 { + return math.Round(f*1e6) / 1e6 +} diff --git a/internal/research/kb/kb_test.go b/internal/research/kb/kb_test.go new file mode 100644 index 00000000..a6985c92 --- /dev/null +++ b/internal/research/kb/kb_test.go @@ -0,0 +1,129 @@ +package kb + +import ( + "math" + "testing" +) + +func f(v float64) *float64 { return &v } + +func minimizeProg(split SplitMode, baseline *float64) Program { + return Program{ + ID: "nanogpt-valbpb", + Criteria: Criteria{Metric: "val_bpb", Direction: Minimize, Accept: BeatsChampion}, + Baseline: baseline, + Pool: 100, + Split: split, + } +} + +func TestSubmit_BeatsChampion_Minimize(t *testing.T) { + k := New(minimizeProg(ByImpact, f(1.20))) // baseline val_bpb 1.20 + + // spark1 improves on baseline → accepted, champion, impact 1.20-1.10. + r1, err := k.Submit("spark1", 1.10, "") + if err != nil { + t.Fatal(err) + } + if !r1.Accepted || !r1.Champion || math.Abs(r1.Impact-0.10) > 1e-9 { + t.Fatalf("r1 = %+v, want accepted champion impact 0.10", r1) + } + + // spark2 improves further → accepted, new champion, impact 1.10-1.05. + r2, _ := k.Submit("spark2", 1.05, "") + if !r2.Accepted || !r2.Champion || math.Abs(r2.Impact-0.05) > 1e-9 { + t.Fatalf("r2 = %+v, want accepted champion impact 0.05", r2) + } + + // spark1 submits a worse value → rejected, not champion, no impact. + r3, _ := k.Submit("spark1", 1.30, "") + if r3.Accepted || r3.Champion || r3.Impact != 0 { + t.Fatalf("r3 = %+v, want rejected", r3) + } + + if c := k.Champion(); c == nil || c.Worker != "spark2" || c.Value != 1.05 { + t.Fatalf("champion = %+v, want spark2 @1.05", c) + } + + // By-impact payout: spark1 0.10, spark2 0.05 → 2:1 of the 100 pool. + pay := k.Payouts() + if math.Abs(pay["spark1"]-66.666667) > 1e-3 || math.Abs(pay["spark2"]-33.333333) > 1e-3 { + t.Fatalf("payouts = %+v, want ~66.67/33.33", pay) + } +} + +func TestSubmit_FirstVerifiedWins(t *testing.T) { + k := New(minimizeProg(ByImpact, f(1.20))) + // Two workers submit the SAME improvement; first one in wins the champion. + a, _ := k.Submit("spark1", 1.10, "") + b, _ := k.Submit("spark2", 1.10, "") + if !a.Champion { + t.Error("first identical submission must take the champion") + } + if b.Accepted || b.Champion { + t.Errorf("second identical submission must not beat the champion: %+v", b) + } +} + +func TestSubmit_FirstResultNoBaselineSetsBaseline(t *testing.T) { + k := New(minimizeProg(ByImpact, nil)) // no baseline + r, _ := k.Submit("spark1", 1.10, "") + if !r.Accepted || !r.Champion || r.Impact != 0 { + t.Fatalf("first result w/o baseline = %+v, want champion impact 0", r) + } + // No positive impact yet → no payouts. + if len(k.Payouts()) != 0 { + t.Errorf("payouts before any improvement = %+v, want empty", k.Payouts()) + } +} + +func TestSubmit_ThresholdMode(t *testing.T) { + p := minimizeProg(ByImpact, nil) + p.Criteria.Accept = Threshold + p.Criteria.Threshold = f(1.00) + k := New(p) + + miss, _ := k.Submit("spark1", 1.05, "") // above threshold → reject + if miss.Accepted { + t.Error("1.05 must miss threshold 1.00 (minimize)") + } + hit, _ := k.Submit("spark2", 0.90, "") // clears threshold → accept, impact 0.10 + if !hit.Accepted || math.Abs(hit.Impact-0.10) > 1e-9 { + t.Fatalf("hit = %+v, want accepted impact 0.10", hit) + } +} + +func TestSubmit_MaximizeDirection(t *testing.T) { + p := minimizeProg(ByImpact, f(0.80)) + p.Criteria.Direction = Maximize + p.Criteria.Metric = "auc" + k := New(p) + r, _ := k.Submit("spark1", 0.90, "") // higher is better → +0.10 + if !r.Accepted || math.Abs(r.Impact-0.10) > 1e-9 { + t.Fatalf("maximize r = %+v, want impact 0.10", r) + } + worse, _ := k.Submit("spark2", 0.85, "") + if worse.Accepted { + t.Error("0.85 < champion 0.90 must be rejected under maximize") + } +} + +func TestPayouts_ChampionTakesAll(t *testing.T) { + k := New(minimizeProg(ChampionTakesAll, f(1.20))) + k.Submit("spark1", 1.10, "") + k.Submit("spark2", 1.05, "") + pay := k.Payouts() + if pay["spark2"] != 100 || len(pay) != 1 { + t.Fatalf("champion-takes-all = %+v, want spark2:100", pay) + } +} + +func TestSubmit_RejectsNonFinite(t *testing.T) { + k := New(minimizeProg(ByImpact, nil)) + if _, err := k.Submit("spark1", math.Inf(1), ""); err == nil { + t.Error("Inf value must be rejected") + } + if _, err := k.Submit("", 1.0, ""); err == nil { + t.Error("empty worker must be rejected") + } +} diff --git a/internal/research/server/server.go b/internal/research/server/server.go new file mode 100644 index 00000000..9da23241 --- /dev/null +++ b/internal/research/server/server.go @@ -0,0 +1,244 @@ +// Package server is the owner-hosted HTTP surface for a decentralized +// auto-research program: the device-auth (membership) endpoints plus the +// membership-gated knowledge base. One Server hosts one Program; the owner +// runs it on their machine and exposes it over a Cloudflare tunnel so +// workers on other obol-stacks reach it over the open internet — every KB +// route gated by a groupauth member token, never an open public route. +package server + +import ( + "crypto/subtle" + "encoding/json" + "log/slog" + "net/http" + "strings" + + "github.com/ObolNetwork/obol-stack/internal/research/groupauth" + "github.com/ObolNetwork/obol-stack/internal/research/kb" +) + +// Membership modes. +const ( + MembershipOpen = "open" // any worker is auto-approved on login + MembershipInvite = "invite" // the owner must approve each worker +) + +// Server hosts one program. +type Server struct { + prog kb.Program + membership string + owner string // owner admin token (gates approve/status-admin) + auth *groupauth.Authority + store *kb.KB + log *slog.Logger +} + +// New builds a Server for program p. ownerToken gates owner-only routes +// (approve). membership is MembershipOpen or MembershipInvite. +func New(p kb.Program, membership, ownerToken string, log *slog.Logger) *Server { + if log == nil { + log = slog.Default() + } + if membership == "" { + membership = MembershipInvite + } + return &Server{ + prog: p, + membership: membership, + owner: ownerToken, + auth: groupauth.New(), + store: kb.New(p), + log: log, + } +} + +// KB exposes the underlying store (for the CLI's local status/close). +func (s *Server) KB() *kb.KB { return s.store } + +// Approve links a pending user_code to the group (owner action). Exposed +// so the owner CLI can approve locally without a round-trip token. +func (s *Server) Approve(userCode string) error { + return s.auth.Approve(s.prog.ID, userCode) +} + +// Handler returns the HTTP mux. +func (s *Server) Handler() http.Handler { + mux := http.NewServeMux() + mux.HandleFunc("GET /healthz", func(w http.ResponseWriter, _ *http.Request) { w.WriteHeader(http.StatusOK) }) + + // Device-auth (public — the device_code is the secret, RFC 8628). + mux.HandleFunc("POST /auth/device/code", s.handleDeviceCode) + mux.HandleFunc("POST /auth/device/token", s.handleDeviceToken) + // Owner-only approval. + mux.HandleFunc("POST /auth/device/approve", s.ownerOnly(s.handleApprove)) + + // Membership-gated KB. + mux.HandleFunc("GET /task", s.member(s.handleTask)) + mux.HandleFunc("GET /champion", s.member(s.handleChampion)) + mux.HandleFunc("POST /results", s.member(s.handleResults)) + mux.HandleFunc("GET /status", s.member(s.handleStatus)) + return mux +} + +// --- device-auth handlers --- + +func (s *Server) handleDeviceCode(w http.ResponseWriter, r *http.Request) { + var body struct { + Worker string `json:"worker"` + } + _ = json.NewDecoder(r.Body).Decode(&body) + + grant, err := s.auth.RequestCode(body.Worker) + if err != nil { + writeErr(w, http.StatusInternalServerError, "server_error", "failed to create code") + return + } + // Open membership: auto-approve so the worker is admitted without the + // owner acting (the program chose to be public-join). + if s.membership == MembershipOpen { + _ = s.auth.Approve(s.prog.ID, grant.UserCode) + } + writeJSON(w, http.StatusOK, grant) +} + +func (s *Server) handleDeviceToken(w http.ResponseWriter, r *http.Request) { + var body struct { + DeviceCode string `json:"device_code"` + } + if err := json.NewDecoder(r.Body).Decode(&body); err != nil || body.DeviceCode == "" { + writeErr(w, http.StatusBadRequest, "invalid_request", "device_code required") + return + } + res, err := s.auth.Poll(body.DeviceCode) + switch err { + case nil: + // The roster is populated authoritatively on Submit (the token does + // not carry the worker's self-declared id). + writeJSON(w, http.StatusOK, res) + case groupauth.ErrExpired: + writeErr(w, http.StatusGone, "expired_token", "device code expired") + default: + writeErr(w, http.StatusNotFound, "invalid_grant", "device code not found") + } +} + +func (s *Server) handleApprove(w http.ResponseWriter, r *http.Request) { + var body struct { + UserCode string `json:"user_code"` + } + if err := json.NewDecoder(r.Body).Decode(&body); err != nil || body.UserCode == "" { + writeErr(w, http.StatusBadRequest, "invalid_request", "user_code required") + return + } + switch err := s.auth.Approve(s.prog.ID, body.UserCode); err { + case nil: + writeJSON(w, http.StatusOK, map[string]string{"status": "approved"}) + case groupauth.ErrExpired: + writeErr(w, http.StatusGone, "expired_code", "code expired") + case groupauth.ErrAlreadyUsed: + writeErr(w, http.StatusConflict, "already_used", "code already used") + default: + writeErr(w, http.StatusNotFound, "invalid_code", "code not found") + } +} + +// --- KB handlers (member-gated) --- + +func (s *Server) handleTask(w http.ResponseWriter, _ *http.Request) { + writeJSON(w, http.StatusOK, map[string]any{ + "program": s.store.Program(), + "champion": s.store.Champion(), + }) +} + +func (s *Server) handleChampion(w http.ResponseWriter, _ *http.Request) { + writeJSON(w, http.StatusOK, map[string]any{"champion": s.store.Champion()}) +} + +func (s *Server) handleResults(w http.ResponseWriter, r *http.Request) { + var body struct { + Worker string `json:"worker"` + Value float64 `json:"value"` + Output string `json:"output"` + } + if err := json.NewDecoder(r.Body).Decode(&body); err != nil { + writeErr(w, http.StatusBadRequest, "invalid_request", "bad result body") + return + } + res, err := s.store.Submit(body.Worker, body.Value, body.Output) + if err != nil { + writeErr(w, http.StatusBadRequest, "invalid_result", err.Error()) + return + } + s.log.Info("result submitted", + "worker", body.Worker, "value", body.Value, + "accepted", res.Accepted, "champion", res.Champion, "impact", res.Impact) + writeJSON(w, http.StatusOK, res) +} + +func (s *Server) handleStatus(w http.ResponseWriter, _ *http.Request) { + writeJSON(w, http.StatusOK, map[string]any{ + "program": s.store.Program(), + "roster": s.store.RosterSorted(), + "results": s.store.Results(), + "champion": s.store.Champion(), + "payouts": s.store.Payouts(), + }) +} + +// --- middleware --- + +// member gates a handler on a valid member token for THIS program's group. +// The owner admin token is also accepted (the owner is a superuser, so the +// owner CLI can read status without minting itself a member token). +func (s *Server) member(next http.HandlerFunc) http.HandlerFunc { + return func(w http.ResponseWriter, r *http.Request) { + tok := bearer(r) + if tok == "" { + writeErr(w, http.StatusUnauthorized, "auth_required", "member token required") + return + } + if s.owner != "" && subtle.ConstantTimeCompare([]byte(tok), []byte(s.owner)) == 1 { + next(w, r) + return + } + gid, ok := s.auth.VerifyToken(tok) + if !ok || gid != s.prog.ID { + writeErr(w, http.StatusForbidden, "not_a_member", "token is not a member of this program") + return + } + next(w, r) + } +} + +// ownerOnly gates a handler on the owner admin token (constant-time). +func (s *Server) ownerOnly(next http.HandlerFunc) http.HandlerFunc { + return func(w http.ResponseWriter, r *http.Request) { + tok := bearer(r) + if s.owner == "" || subtle.ConstantTimeCompare([]byte(tok), []byte(s.owner)) != 1 { + writeErr(w, http.StatusUnauthorized, "owner_required", "owner token required") + return + } + next(w, r) + } +} + +// --- helpers --- + +func bearer(r *http.Request) string { + h := r.Header.Get("Authorization") + if v, ok := strings.CutPrefix(h, "Bearer "); ok { + return strings.TrimSpace(v) + } + return "" +} + +func writeJSON(w http.ResponseWriter, code int, v any) { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(code) + _ = json.NewEncoder(w).Encode(v) +} + +func writeErr(w http.ResponseWriter, code int, kind, msg string) { + writeJSON(w, code, map[string]string{"error": kind, "message": msg}) +} diff --git a/internal/research/server/server_test.go b/internal/research/server/server_test.go new file mode 100644 index 00000000..bd7d60aa --- /dev/null +++ b/internal/research/server/server_test.go @@ -0,0 +1,118 @@ +package server + +import ( + "bytes" + "encoding/json" + "net/http" + "net/http/httptest" + "strings" + "testing" + + "github.com/ObolNetwork/obol-stack/internal/research/kb" +) + +func testProgram() kb.Program { + base := 1.20 + return kb.Program{ + ID: "nanogpt-valbpb", + Criteria: kb.Criteria{Metric: "val_bpb", Direction: kb.Minimize, Accept: kb.BeatsChampion}, + Baseline: &base, + Pool: 100, Token: "OBOL", Network: "base-sepolia", Split: kb.ByImpact, + } +} + +func do(t *testing.T, h http.Handler, method, path, token string, body any) (*httptest.ResponseRecorder, map[string]any) { + t.Helper() + var r *http.Request + if body != nil { + b, _ := json.Marshal(body) + r = httptest.NewRequest(method, path, bytes.NewReader(b)) + } else { + r = httptest.NewRequest(method, path, nil) + } + if token != "" { + r.Header.Set("Authorization", "Bearer "+token) + } + w := httptest.NewRecorder() + h.ServeHTTP(w, r) + var out map[string]any + _ = json.Unmarshal(w.Body.Bytes(), &out) + return w, out +} + +func TestServer_InviteFlow_EndToEnd(t *testing.T) { + s := New(testProgram(), MembershipInvite, "owner-secret", nil) + h := s.Handler() + + // KB is gated before membership. + if w, _ := do(t, h, "GET", "/task", "", nil); w.Code != http.StatusUnauthorized { + t.Fatalf("ungated /task = %d, want 401", w.Code) + } + + // Worker requests a device code. + _, code := do(t, h, "POST", "/auth/device/code", "", map[string]string{"worker": "spark1"}) + deviceCode, _ := code["device_code"].(string) + userCode, _ := code["user_code"].(string) + if deviceCode == "" || userCode == "" { + t.Fatalf("device code grant = %+v", code) + } + + // Pre-approval poll is pending. + if _, tok := do(t, h, "POST", "/auth/device/token", "", map[string]string{"device_code": deviceCode}); tok["status"] != "authorization_pending" { + t.Fatalf("pre-approval poll = %+v", tok) + } + + // Approve requires the owner token. + if w, _ := do(t, h, "POST", "/auth/device/approve", "wrong", map[string]string{"user_code": userCode}); w.Code != http.StatusUnauthorized { + t.Fatalf("approve w/o owner token = %d, want 401", w.Code) + } + if w, _ := do(t, h, "POST", "/auth/device/approve", "owner-secret", map[string]string{"user_code": userCode}); w.Code != http.StatusOK { + t.Fatalf("owner approve = %d, want 200", w.Code) + } + + // Worker polls and gets a member token. + _, tok := do(t, h, "POST", "/auth/device/token", "", map[string]string{"device_code": deviceCode}) + if tok["status"] != "authorized" { + t.Fatalf("post-approval poll = %+v", tok) + } + member, _ := tok["token"].(string) + if !strings.HasPrefix(member, "obol-research-mt-") { + t.Fatalf("member token = %q", member) + } + + // Member can read the task. + if w, task := do(t, h, "GET", "/task", member, nil); w.Code != http.StatusOK || task["program"] == nil { + t.Fatalf("member /task = %d %+v", w.Code, task) + } + + // A bogus token is forbidden. + if w, _ := do(t, h, "GET", "/task", "obol-research-mt-bogus", nil); w.Code != http.StatusForbidden { + t.Fatalf("bogus token /task = %d, want 403", w.Code) + } + + // Member submits a result that beats the baseline → accepted champion. + w, res := do(t, h, "POST", "/results", member, map[string]any{"worker": "spark1", "value": 1.10}) + if w.Code != http.StatusOK || res["accepted"] != true || res["champion"] != true { + t.Fatalf("submit = %d %+v", w.Code, res) + } + + // Status reflects roster + champion + payout. + _, st := do(t, h, "GET", "/status", member, nil) + if st["champion"] == nil { + t.Fatalf("status champion missing: %+v", st) + } +} + +func TestServer_OpenMembershipAutoApproves(t *testing.T) { + s := New(testProgram(), MembershipOpen, "owner", nil) + h := s.Handler() + + _, code := do(t, h, "POST", "/auth/device/code", "", map[string]string{"worker": "w"}) + deviceCode := code["device_code"].(string) + + // No owner approve step — open membership auto-approved; first poll mints. + _, tok := do(t, h, "POST", "/auth/device/token", "", map[string]string{"device_code": deviceCode}) + if tok["status"] != "authorized" || tok["token"] == "" { + t.Fatalf("open-membership poll = %+v", tok) + } +} diff --git a/internal/schemas/payment.go b/internal/schemas/payment.go index 740d1eee..21d96a0c 100644 --- a/internal/schemas/payment.go +++ b/internal/schemas/payment.go @@ -29,10 +29,23 @@ var ( approxMinutesPerRequestDecimal = decimal.NewFromInt(ApproxMinutesPerRequest) ) -// PaymentTerms defines x402 payment requirements for a ServiceOffer. -// Field names align with x402 PaymentRequirements (V2). +// PaymentMethodCrypto gates the offer with x402 on-chain stablecoin +// settlement. It is the default when PaymentTerms.Method is empty. +const PaymentMethodCrypto = "crypto" + +// PaymentMethodCard gates the offer with an MPP credit-card method +// (Stripe stripe.charge), settled off-chain into PaymentTerms.Card.Account. +const PaymentMethodCard = "card" + +// PaymentTerms defines payment requirements for a ServiceOffer. Field +// names align with x402 PaymentRequirements (V2) for the crypto method; +// the Card block carries the off-chain credit-card (MPP/Stripe) terms. type PaymentTerms struct { - // Scheme is the x402 payment scheme. Default: "exact". + // Method selects the payment method: "crypto" (x402 on-chain, default) + // or "card" (MPP Stripe). Empty is treated as "crypto". + Method string `json:"method,omitempty" yaml:"method,omitempty"` + + // Scheme is the x402 payment scheme. Default: "exact". Crypto only. Scheme string `json:"scheme,omitempty" yaml:"scheme,omitempty"` // Network is the chain identifier (human-friendly, e.g., "base-sepolia"). @@ -47,12 +60,47 @@ type PaymentTerms struct { // Asset defines the token metadata used for x402 settlement. When omitted, // the verifier falls back to the chain default asset (currently USDC). + // Crypto only. Asset AssetTerms `json:"asset,omitempty" yaml:"asset,omitempty"` + // Card holds off-chain credit-card settlement terms when Method=="card". + Card *CardTerms `json:"card,omitempty" yaml:"card,omitempty"` + // Price defines the pricing model (type-specific). Price PriceTable `json:"price" yaml:"price"` } +// EffectiveMethod returns the payment method, defaulting an empty value to +// PaymentMethodCrypto so existing crypto offers keep working unchanged. +func (p PaymentTerms) EffectiveMethod() string { + if p.Method == "" { + return PaymentMethodCrypto + } + return p.Method +} + +// CardTerms defines off-chain credit-card settlement terms used when +// PaymentTerms.Method == "card". It mirrors monetizeapi.ServiceOfferCardPayment. +type CardTerms struct { + // Provider is the card payment provider (only "stripe" today). + Provider string `json:"provider,omitempty" yaml:"provider,omitempty"` + + // Account is the destination account that receives settled card funds. + // For Stripe this is the connected/destination account id (acct_...). + Account string `json:"account,omitempty" yaml:"account,omitempty"` + + // Currency is the ISO-4217 currency the card is charged in (e.g. "usd"). + Currency string `json:"currency,omitempty" yaml:"currency,omitempty"` + + // NetworkID is the optional Stripe "machine payments" network id, + // surfaced in the 402 challenge so MPP card clients can mint a token. + NetworkID string `json:"networkId,omitempty" yaml:"networkId,omitempty"` + + // PaymentMethodTypes are the accepted payment-method types advertised to + // the client. Defaults to ["card"] at the gateway when empty. + PaymentMethodTypes []string `json:"paymentMethodTypes,omitempty" yaml:"paymentMethodTypes,omitempty"` +} + const ( AssetTransferMethodEIP3009 = "eip3009" AssetTransferMethodPermit2 = "permit2" diff --git a/internal/serviceoffercontroller/bounty.go b/internal/serviceoffercontroller/bounty.go new file mode 100644 index 00000000..5c89afc2 --- /dev/null +++ b/internal/serviceoffercontroller/bounty.go @@ -0,0 +1,565 @@ +package serviceoffercontroller + +// ServiceBounty reconcile — the demand-side sibling pass, following the +// RegistrationRequest/PurchaseRequest precedent: one more informer + queue + +// worker on the same Controller, in the same binary. +// +// Lifecycle: Open → Claimed → Submitted → Verified → Paid, with Expired → +// Refunded on deadline and Rejected on a poster verdict. Machine truth is the +// condition set (TaskValid, EscrowReserved, Claimed, Submitted, Verified, +// Paid); status.phase is the human rollup. +// +// Claim/submit/verdict arrive as ANNOTATIONS on the CR (the k8s-native write +// channel for agents/CLI, validated and promoted into controller-owned +// status). v1 trust posture is the design doc's v0: escrow via the Gateway +// seam (dev-ledger locally until the facilitator routes ship) and +// poster-as-judge acceptance; the OBOL eval market replaces the poster verdict +// in a later slice. The controller signs NOTHING — see internal/x402/escrow. + +import ( + "context" + "encoding/json" + "fmt" + "log" + "net/http" + "os" + "slices" + "strings" + "time" + + "github.com/ObolNetwork/obol-stack/internal/bounty" + "github.com/ObolNetwork/obol-stack/internal/monetizeapi" + "github.com/ObolNetwork/obol-stack/internal/x402/escrow" + "github.com/ethereum/go-ethereum/common" + "k8s.io/apimachinery/pkg/api/equality" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/client-go/tools/cache" +) + +const ( + serviceBountyFinalizer = "obol.org/servicebounty-finalizer" + + // Annotation write-channel (agent/CLI → controller). + bountyClaimAnnotation = "obol.org/claim" // fulfiller payout address (0x…) + bountyCommitAnnotation = "obol.org/commit" // commit hash (anti bait-and-switch) + bountySubmitAnnotation = "obol.org/submit" // JSON {"resultHash":"…","reportURI":"…"} + bountyVerdictAnnotation = "obol.org/verdict" // "accept" or "reject:" + + bountyPhaseInvalid = "Invalid" + bountyPhaseOpen = "Open" + bountyPhaseClaimed = "Claimed" + bountyPhaseSubmitted = "Submitted" + bountyPhaseVerified = "Verified" + bountyPhasePaid = "Paid" + bountyPhaseRejected = "Rejected" + bountyPhaseExpired = "Expired" + bountyPhaseRefunded = "Refunded" +) + +// bountySubmission is the bountySubmitAnnotation payload. +type bountySubmission struct { + ResultHash string `json:"resultHash"` + ReportURI string `json:"reportURI"` +} + +func (c *Controller) enqueueBounty(obj any) { + key, err := cache.DeletionHandlingMetaNamespaceKeyFunc(obj) + if err != nil { + log.Printf("serviceoffer-controller: build bounty queue key: %v", err) + return + } + c.bountyQueue.Add(key) +} + +func (c *Controller) processNextBounty(ctx context.Context) bool { + key, shutdown := c.bountyQueue.Get() + if shutdown { + return false + } + defer c.bountyQueue.Done(key) + + if err := c.reconcileBounty(ctx, key); err != nil { + log.Printf("serviceoffer-controller: reconcile bounty %s: %v", key, err) + c.bountyQueue.AddRateLimited(key) + return true + } + + c.bountyQueue.Forget(key) + return true +} + +func (c *Controller) reconcileBounty(ctx context.Context, key string) error { + namespace, name, err := cache.SplitMetaNamespaceKey(key) + if err != nil { + return err + } + + raw, err := c.dynClient.Resource(monetizeapi.ServiceBountyGVR).Namespace(namespace).Get(ctx, name, metav1.GetOptions{}) + if apierrors.IsNotFound(err) { + return nil + } + if err != nil { + return err + } + + var sb monetizeapi.ServiceBounty + if err := runtime.DefaultUnstructuredConverter.FromUnstructured(raw.Object, &sb); err != nil { + return fmt.Errorf("decode ServiceBounty: %w", err) + } + + // Deletion: best-effort escrow void (poster keeps funds), then release + // the finalizer. A captured escrow is final — void refuses, and we still + // remove the finalizer (the reward was legitimately paid). + if raw.GetDeletionTimestamp() != nil { + if !slices.Contains(raw.GetFinalizers(), serviceBountyFinalizer) { + return nil + } + if sb.Status.EscrowState == escrow.StateReserved { + if _, err := c.escrowGateway().Void(ctx, string(sb.UID)); err != nil { + log.Printf("serviceoffer-controller: void escrow for deleting bounty %s: %v", key, err) + } + } + if sb.Status.BondState == escrow.StateReserved { + if _, err := c.escrowGateway().Void(ctx, string(sb.UID)+"-bond"); err != nil { + log.Printf("serviceoffer-controller: void bond for deleting bounty %s: %v", key, err) + } + } + if sb.Status.EvalBudgetState == escrow.StateReserved { + if _, err := c.escrowGateway().Void(ctx, string(sb.UID)+"-eval"); err != nil { + log.Printf("serviceoffer-controller: void eval budget for deleting bounty %s: %v", key, err) + } + } + if sb.Status.Escalation != nil && sb.Status.Escalation.BudgetState == escrow.StateReserved { + if _, err := c.escrowGateway().Void(ctx, string(sb.UID)+"-eval-r1"); err != nil { + log.Printf("serviceoffer-controller: void escalation budget for deleting bounty %s: %v", key, err) + } + } + return c.removeBountyFinalizer(ctx, raw) + } + + if !slices.Contains(raw.GetFinalizers(), serviceBountyFinalizer) { + patched := raw.DeepCopy() + patched.SetFinalizers(append(patched.GetFinalizers(), serviceBountyFinalizer)) + _, err := c.dynClient.Resource(monetizeapi.ServiceBountyGVR).Namespace(namespace).Update(ctx, patched, metav1.UpdateOptions{}) + return err + } + + status := sb.Status + status.ObservedGeneration = sb.Generation + + // 1. Task validity — typeRef must resolve against the embedded registry + // and params must satisfy the package's schema. Invalid bounties park + // (no requeue): only a spec change can fix them. + if err := validateBountyTask(&sb); err != nil { + setPurchaseCondition(&status.Conditions, "TaskValid", "False", "InvalidTask", truncateMessage(err.Error())) + status.Phase = bountyPhaseInvalid + return c.updateBountyStatus(ctx, raw, status) + } + setPurchaseCondition(&status.Conditions, "TaskValid", "True", "Resolved", fmt.Sprintf("Task type %s resolved", sb.Spec.Task.TypeRef)) + + // 2. Deadline — past it with no accepted verdict, the escrow is returned + // to the poster. Requeue at expiry so the refund happens on time without + // any spec mutation (the DrainAt requeue discipline). + if sb.Spec.Deadline != nil && !bountyConditionIsTrue(status.Conditions, "Verified") { + now := time.Now() + if now.After(sb.Spec.Deadline.Time) { + return c.refundBounty(ctx, raw, &sb, status, "DeadlineExpired", + fmt.Sprintf("Deadline %s passed without an accepted submission", sb.Spec.Deadline.UTC().Format(time.RFC3339))) + } + if delay := time.Until(sb.Spec.Deadline.Time) + time.Second; delay > 0 { + c.bountyQueue.AddAfter(key, delay) + } + } + + // 3. Escrow reserve — hold the reward before any claim is admitted, so a + // fulfiller never starts work against an unfunded bounty. A facilitator + // that needs the poster's Permit2 voucher answers AwaitingVoucher; the + // voucher ferries in on the obol.org/reward-voucher annotation and the + // reserve re-runs (idempotent at the facilitator) until it holds. + annotations := raw.GetAnnotations() + if status.EscrowState == "" || status.EscrowState == escrowStateAwaitingVoucher { + receipt, err := c.escrowGateway().Reserve(ctx, escrow.ReserveRequest{ + ID: string(sb.UID), + Network: sb.Spec.Reward.Network, + PayTo: sb.Spec.Reward.PayTo, + Asset: sb.Spec.Reward.Asset.Symbol, + Amount: sb.Spec.Reward.Amount, + Scheme: sb.Spec.Reward.Escrow.Scheme, + Voucher: voucherFromAnnotations(annotations, bountyRewardVoucherAnnotation), + }) + if err != nil { + setPurchaseCondition(&status.Conditions, "EscrowReserved", "False", "FacilitatorError", truncateMessage(err.Error())) + status.Phase = bountyPhaseOpen + if statusErr := c.updateBountyStatus(ctx, raw, status); statusErr != nil { + return statusErr + } + return err // rate-limited retry + } + status.EscrowState = receipt.State + ferryEscrowSpender(&status, receipt) + } + if status.EscrowState == escrowStateAwaitingVoucher { + setPurchaseCondition(&status.Conditions, "EscrowReserved", "False", "EscrowAwaitingVoucher", + fmt.Sprintf("Reward hold awaits the poster's Permit2 voucher (%s annotation)", bountyRewardVoucherAnnotation)) + } else { + setPurchaseCondition(&status.Conditions, "EscrowReserved", "True", "Reserved", escrowReason(c.escrowGateway())) + } + + // 4. Claim — promote the claim annotation into controller-owned status. + if claim := strings.TrimSpace(annotations[bountyClaimAnnotation]); claim != "" && len(status.Claims) == 0 { + if !common.IsHexAddress(claim) { + setPurchaseCondition(&status.Conditions, "Claimed", "False", "InvalidAddress", + fmt.Sprintf("claim annotation %q is not a hex address", claim)) + status.Phase = bountyPhaseOpen + return c.updateBountyStatus(ctx, raw, status) + } + now := metav1.Now() + status.Claims = []monetizeapi.ServiceBountyClaim{{ + FulfillerAddress: common.HexToAddress(claim).Hex(), + ClaimedAt: &now, + CommitHash: strings.TrimSpace(annotations[bountyCommitAnnotation]), + Phase: bountyPhaseClaimed, + }} + } + if len(status.Claims) > 0 { + setPurchaseCondition(&status.Conditions, "Claimed", "True", "Claimed", + fmt.Sprintf("Claimed by %s", status.Claims[0].FulfillerAddress)) + // Late commit: the commit annotation may land after the claim. + if commit := strings.TrimSpace(annotations[bountyCommitAnnotation]); commit != "" && status.Claims[0].CommitHash == "" { + status.Claims[0].CommitHash = commit + } + } else { + setPurchaseCondition(&status.Conditions, "Claimed", "False", "Open", "No fulfiller has claimed this bounty") + } + + // 4b. Self-bond — held at the escrow gateway against the fulfiller's own + // funds at claim time (anti-griefing: returned on success or honest + // timeout, forfeited on rejected work to offset the poster's eval spend). + if sb.Spec.Trust.SelfBond.Required && len(status.Claims) > 0 && + (status.BondState == "" || status.BondState == escrowStateAwaitingVoucher) { + receipt, err := c.escrowGateway().Reserve(ctx, escrow.ReserveRequest{ + ID: string(sb.UID) + "-bond", + Network: sb.Spec.Reward.Network, + PayTo: status.Claims[0].FulfillerAddress, + Asset: sb.Spec.Trust.SelfBond.Token, + Amount: sb.Spec.Trust.SelfBond.Amount, + Scheme: sb.Spec.Reward.Escrow.Scheme, + Voucher: voucherFromAnnotations(annotations, bountyBondVoucherAnnotation), + }) + if err != nil { + if statusErr := c.updateBountyStatus(ctx, raw, status); statusErr != nil { + return statusErr + } + return err // rate-limited retry + } + status.BondState = receipt.State + ferryEscrowSpender(&status, receipt) + } + + // 5. Submit — parse the submission annotation, advance the claim. + if subRaw := strings.TrimSpace(annotations[bountySubmitAnnotation]); subRaw != "" && len(status.Claims) > 0 { + var sub bountySubmission + if err := json.Unmarshal([]byte(subRaw), &sub); err != nil { + setPurchaseCondition(&status.Conditions, "Submitted", "False", "InvalidSubmission", truncateMessage(err.Error())) + } else { + if status.Claims[0].Phase == bountyPhaseClaimed { + status.Claims[0].Phase = bountyPhaseSubmitted + } + status.ReportURI = sub.ReportURI + setPurchaseCondition(&status.Conditions, "Submitted", "True", "Submitted", + fmt.Sprintf("Result hash %s", sub.ResultHash)) + } + } else if !bountyConditionIsTrue(status.Conditions, "Submitted") { + setPurchaseCondition(&status.Conditions, "Submitted", "False", "AwaitingSubmission", "No submission yet") + } + + // 5b. Eval market — verification-by-default: once a submission exists and + // the bounty is not dangerously skipped (nor poster-manual), the + // commit-reveal quorum drives Verified (reason=EvaluatorQuorum). The + // poster verdict annotation below still overrides either way. + if evalMarketActive(&sb) && bountyConditionIsTrue(status.Conditions, "Submitted") { + if requeue := c.reconcileEvalMarket(ctx, &sb, annotations, &status, time.Now()); requeue > 0 { + c.bountyQueue.AddAfter(key, requeue) + } + } + + // 6. Verdict — the poster verdict annotation. With the eval market active + // it is an explicit OVERRIDE on top of (or instead of) the quorum; for + // poster-manual or dangerously-skipped bounties it is the designed path. + verdict := strings.TrimSpace(annotations[bountyVerdictAnnotation]) + quorumSpoke := conditionReason(status.Conditions, "Verified") == "EvaluatorQuorum" + switch { + case verdict == "accept" && bountyConditionIsTrue(status.Conditions, "Submitted"): + reason := "PosterAccepted" + if sb.Spec.Acceptance.Method != "poster-manual" && !bountyConditionIsTrue(status.Conditions, "Verified") { + reason = "PosterOverride" + } + if !bountyConditionIsTrue(status.Conditions, "Verified") { + setPurchaseCondition(&status.Conditions, "Verified", "True", reason, "Submission accepted by poster") + status.WeightedScore = 100 + } + if len(status.Claims) > 0 { + status.Claims[0].Phase = bountyPhaseVerified + } + case strings.HasPrefix(verdict, "reject"): + reason := strings.TrimPrefix(strings.TrimPrefix(verdict, "reject"), ":") + if reason == "" { + reason = "rejected by poster" + } + setPurchaseCondition(&status.Conditions, "Verified", "False", "PosterRejected", truncateMessage(reason)) + if len(status.Claims) > 0 { + status.Claims[0].Phase = bountyPhaseRejected + } + case bountyConditionIsTrue(status.Conditions, "Submitted") && !bountyConditionIsTrue(status.Conditions, "Verified") && !quorumSpoke: + setPurchaseCondition(&status.Conditions, "Verified", "False", "AwaitingVerdict", + awaitingVerdictMessage(sb.Spec.Acceptance.Method, sb.Spec.Eval.Mode)) + case !bountyConditionIsTrue(status.Conditions, "Verified") && !quorumSpoke: + setPurchaseCondition(&status.Conditions, "Verified", "False", "AwaitingSubmission", "No submission to verify") + } + + // 6b. Self-bond settlement: returned on an accepted verdict, forfeited on + // rejected work (poster or quorum). Deadline expiry returns it (honest + // timeout) via refundBounty. + if status.BondState == escrow.StateReserved { + switch { + case bountyConditionIsTrue(status.Conditions, "Verified"): + if _, err := c.escrowGateway().Void(ctx, string(sb.UID)+"-bond"); err == nil { + status.BondState = "Returned" + } + case len(status.Claims) > 0 && status.Claims[0].Phase == bountyPhaseRejected: + if _, err := c.escrowGateway().Capture(ctx, string(sb.UID)+"-bond"); err == nil { + status.BondState = "Forfeited" + } + } + } + + // 7. Payout — Verified + a held escrow → capture to the fulfiller. + if bountyConditionIsTrue(status.Conditions, "Verified") && status.EscrowState == escrow.StateReserved { + receipt, err := c.escrowGateway().Capture(ctx, string(sb.UID)) + if err != nil { + if isEscrowVoucherRefusal(err) { + // The facilitator wants a (fresh) Permit2 voucher before it + // settles — a poster-side signing gap, not a controller + // failure. Park as a condition + requeue; never fail the loop. + setPurchaseCondition(&status.Conditions, "Paid", "False", "EscrowAwaitingVoucher", truncateMessage(err.Error())) + c.bountyQueue.AddAfter(key, 30*time.Second) + status.Phase = bountyPhaseRollup(status) + return c.updateBountyStatus(ctx, raw, status) + } + setPurchaseCondition(&status.Conditions, "Paid", "False", "CaptureFailed", truncateMessage(err.Error())) + if statusErr := c.updateBountyStatus(ctx, raw, status); statusErr != nil { + return statusErr + } + return err // verified-but-unpaid is a retryable, worker-protecting state + } + status.EscrowState = receipt.State + status.CaptureTxHash = receipt.TxHash + ferryEscrowSpender(&status, receipt) + } + if status.EscrowState == escrow.StateCaptured { + setPurchaseCondition(&status.Conditions, "Paid", "True", "Captured", "Reward released to fulfiller") + if len(status.Claims) > 0 && status.Claims[0].Phase == bountyPhaseVerified { + status.Claims[0].Phase = bountyPhasePaid + } + } else if !bountyConditionIsTrue(status.Conditions, "Paid") { + setPurchaseCondition(&status.Conditions, "Paid", "False", "AwaitingVerification", "Escrow capture follows an accepted verdict") + } + + status.Phase = bountyPhaseRollup(status) + return c.updateBountyStatus(ctx, raw, status) +} + +// refundBounty voids the escrow and parks the bounty in Expired/Refunded. +// A held self-bond is returned — deadline expiry is an honest timeout, not +// rejected work. +func (c *Controller) refundBounty(ctx context.Context, raw *unstructured.Unstructured, sb *monetizeapi.ServiceBounty, status monetizeapi.ServiceBountyStatus, reason, message string) error { + if status.BondState == escrow.StateReserved { + if _, err := c.escrowGateway().Void(ctx, string(sb.UID)+"-bond"); err == nil { + status.BondState = "Returned" + } + } + if status.EvalBudgetState == escrow.StateReserved { + if _, err := c.escrowGateway().Void(ctx, string(sb.UID)+"-eval"); err == nil { + status.EvalBudgetState = escrow.StateVoided + } + } + if status.Escalation != nil && status.Escalation.BudgetState == escrow.StateReserved { + if _, err := c.escrowGateway().Void(ctx, string(sb.UID)+"-eval-r1"); err == nil { + status.Escalation.BudgetState = escrow.StateVoided + } + } + if status.EscrowState == escrow.StateReserved { + receipt, err := c.escrowGateway().Void(ctx, string(sb.UID)) + if err != nil { + setPurchaseCondition(&status.Conditions, "Paid", "False", "RefundFailed", truncateMessage(err.Error())) + if statusErr := c.updateBountyStatus(ctx, raw, status); statusErr != nil { + return statusErr + } + return err + } + status.EscrowState = receipt.State + status.RefundTxHash = receipt.TxHash + } + setPurchaseCondition(&status.Conditions, "Verified", "False", reason, message) + setPurchaseCondition(&status.Conditions, "Paid", "False", reason, "Escrow returned to poster") + status.Phase = bountyPhaseRefunded + if status.EscrowState == "" { + status.Phase = bountyPhaseExpired + } + return c.updateBountyStatus(ctx, raw, status) +} + +// bountyPhaseRollup derives the human phase from the condition machine truth. +func bountyPhaseRollup(status monetizeapi.ServiceBountyStatus) string { + conditions := status.Conditions + claimRejected := len(status.Claims) > 0 && status.Claims[0].Phase == bountyPhaseRejected + switch { + case bountyConditionIsTrue(conditions, "Paid"): + return bountyPhasePaid + case bountyConditionIsTrue(conditions, "Verified"): + return bountyPhaseVerified + case conditionReason(conditions, "Verified") == "PosterRejected" || claimRejected: + return bountyPhaseRejected + case bountyConditionIsTrue(conditions, "Submitted"): + return bountyPhaseSubmitted + case bountyConditionIsTrue(conditions, "Claimed"): + return bountyPhaseClaimed + default: + return bountyPhaseOpen + } +} + +// validateBountyTask resolves spec.task.typeRef against the embedded registry +// and validates params + the reward envelope needed to construct the escrow. +// Admission is strict: a gate that silently accepts what it doesn't understand +// is not a gate (unknown params are typo'd intent, not extensibility). +func validateBountyTask(sb *monetizeapi.ServiceBounty) error { + t, err := bounty.Resolve(sb.Spec.Task.TypeRef) + if err != nil { + return err + } + + known := make(map[string]bool, len(t.Params)) + for _, p := range t.Params { + known[p.Name] = true + } + for name := range sb.Spec.Task.Params { + if !known[name] { + return fmt.Errorf("unknown param %q for task type %s", name, t.Ref()) + } + } + + for _, p := range t.Params { + v := sb.Spec.Task.Params[p.Name] + if p.Required && strings.TrimSpace(v) == "" { + return fmt.Errorf("param %s is required for task type %s", p.Name, t.Ref()) + } + if v == "" { + continue + } + if len(p.Enum) > 0 && !slices.Contains(p.Enum, v) { + return fmt.Errorf("param %s=%q is not one of [%s]", p.Name, v, strings.Join(p.Enum, ", ")) + } + } + + // Single-winner guard: the controller admits one claim at a time. Honoring + // >1 silently would promise a race/split semantic that does not exist yet. + if sb.Spec.MaxFulfillers > 1 { + return fmt.Errorf("maxFulfillers=%d is not supported yet — v1 bounties are single-winner", sb.Spec.MaxFulfillers) + } + + if strings.TrimSpace(sb.Spec.Reward.Amount) == "" { + return fmt.Errorf("reward.amount is required") + } + if strings.TrimSpace(sb.Spec.Reward.Network) == "" { + return fmt.Errorf("reward.network is required to construct the escrow authorization") + } + + return nil +} + +func awaitingVerdictMessage(method, evalMode string) string { + if method == "poster-manual" || evalMode == monetizeapi.EvalModeDangerouslySkipped { + return "Awaiting poster verdict — accept with `obol bounty accept `" + } + return fmt.Sprintf("Eval market for %s is not wired yet; poster may override with `obol bounty accept `", method) +} + +func bountyConditionIsTrue(conditions []monetizeapi.Condition, conditionType string) bool { + for _, condition := range conditions { + if condition.Type == conditionType { + return condition.Status == "True" + } + } + return false +} + +func conditionReason(conditions []monetizeapi.Condition, conditionType string) string { + for _, condition := range conditions { + if condition.Type == conditionType { + return condition.Reason + } + } + return "" +} + +// newBountyEscrowGateway selects the escrow backend from controller-level +// configuration, NOT from spec.reward.escrow.facilitator: the gateway carries +// the controller's release-authority bearer token, and honoring an arbitrary +// per-bounty URL would let a poster exfiltrate that token to a server they +// control. The spec field stays advisory/documentary. +func newBountyEscrowGateway() escrow.Gateway { + if base := strings.TrimSpace(os.Getenv("OBOL_BOUNTY_ESCROW_URL")); base != "" { + return &escrow.HTTPGateway{ + Base: base, + Token: strings.TrimSpace(os.Getenv("OBOL_BOUNTY_ESCROW_TOKEN")), + Client: &http.Client{Timeout: 10 * time.Second}, + } + } + return escrow.NewLedgerGateway() +} + +// defaultBountyLedger backs Controllers constructed without an explicit +// gateway (struct-literal tests); New() always sets bountyEscrow. +var defaultBountyLedger = escrow.NewLedgerGateway() + +// escrowGateway returns the configured gateway, defaulting to the dev ledger. +// The dev ledger is escrow theater for local-first stacks — receipts are +// labeled dev-ledger and the EscrowReserved reason says so. +func (c *Controller) escrowGateway() escrow.Gateway { + if c.bountyEscrow != nil { + return c.bountyEscrow + } + return defaultBountyLedger +} + +func escrowReason(g escrow.Gateway) string { + if _, ok := g.(*escrow.LedgerGateway); ok { + return "Reward hold recorded in dev ledger (no funds held — local dev mode)" + } + return "Reward authorization held at facilitator" +} + +func (c *Controller) removeBountyFinalizer(ctx context.Context, raw *unstructured.Unstructured) error { + patched := raw.DeepCopy() + patched.SetFinalizers(slices.DeleteFunc(patched.GetFinalizers(), func(s string) bool { return s == serviceBountyFinalizer })) + _, err := c.dynClient.Resource(monetizeapi.ServiceBountyGVR).Namespace(patched.GetNamespace()).Update(ctx, patched, metav1.UpdateOptions{}) + return err +} + +func (c *Controller) updateBountyStatus(ctx context.Context, raw *unstructured.Unstructured, status monetizeapi.ServiceBountyStatus) error { + patched := raw.DeepCopy() + statusObject, err := runtime.DefaultUnstructuredConverter.ToUnstructured(&status) + if err != nil { + return err + } + if existing, found := patched.Object["status"]; found && equality.Semantic.DeepEqual(existing, statusObject) { + return nil + } + patched.Object["status"] = statusObject + _, err = c.dynClient.Resource(monetizeapi.ServiceBountyGVR).Namespace(patched.GetNamespace()).UpdateStatus(ctx, patched, metav1.UpdateOptions{}) + return err +} diff --git a/internal/serviceoffercontroller/bounty_escalation.go b/internal/serviceoffercontroller/bounty_escalation.go new file mode 100644 index 00000000..9863cfc5 --- /dev/null +++ b/internal/serviceoffercontroller/bounty_escalation.go @@ -0,0 +1,300 @@ +package serviceoffercontroller + +// Escalation round (design doc §11.6): when round 0 settles diverged +// (dispersion) or knife-edge on the pass threshold, the verdict is NOT spoken. +// Instead a fresh 2k+1 panel — excluding the round-0 panel and the fulfiller — +// re-runs the same commit-reveal cycle on annotation prefixes +// obol.org/eval-commit-r1- / obol.org/eval-reveal-r1-, and ITS +// median is final. One escalation per bounty (status.escalation is a latch); +// the round-1 eval budget is a separate poster-funded escrow leg +// (-eval-r1, voucher annotation obol.org/eval-voucher-r1) that pays every +// round-1 evaluator full price, win-or-lose. If the voucher never arrives +// before the escalation window closes, the escalation is Unfunded and the +// round-0 median stands — evaluators are never asked to work unpaid. + +import ( + "context" + "fmt" + "log" + "strconv" + "strings" + "time" + + "github.com/ObolNetwork/obol-stack/internal/bounty" + "github.com/ObolNetwork/obol-stack/internal/monetizeapi" + "github.com/ObolNetwork/obol-stack/internal/x402/escrow" + "github.com/ethereum/go-ethereum/common" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apimachinery/pkg/runtime" +) + +const ( + bountyEvalCommitR1Prefix = "obol.org/eval-commit-r1-" + bountyEvalRevealR1Prefix = "obol.org/eval-reveal-r1-" + + // Fallbacks mirror internal/bounty registry ladder defaults for task + // packages that cannot be resolved. + defaultEscalationWindow = 30 * time.Minute + defaultEscalationEpsilon = 5 +) + +// selectEscalationPanelFn is the seam to the escalation panel selection +// implemented in bounty_panel.go. A variable so tests inject a deterministic +// fake panel (the selection itself is exercised by the panel tests). +var selectEscalationPanelFn = (*Controller).selectEscalationPanel + +// escalationTrigger reports why round 0 must escalate ("" = settle normally): +// +// (a) dispersion — at least ⌈k/2⌉ counting REVEALS landed out of band +// around the median (non-reveals are penalized, not dispersion); +// (b) knife-edge — the median sits within epsilon of the pass threshold, +// where a single re-rolled evaluator could have flipped the verdict. +// epsilon <= 0 disables the knife-edge trigger. +func escalationTrigger(evaluations []monetizeapi.ServiceBountyEvaluation, k, median int64, epsilon int) string { + outOfBand := int64(0) + for _, evaluation := range evaluations { + if evaluation.Phase == evalPhaseRevealed && evaluation.Seat != monetizeapi.PanelSeatShadow && !evaluation.WithinBand { + outOfBand++ + } + } + if outOfBand >= (k+1)/2 { + return fmt.Sprintf("dispersion: %d of %d counting reveal(s) out of band around median %d", outOfBand, k, median) + } + if epsilon > 0 { + diff := median - evalPassThreshold + if diff < 0 { + diff = -diff + } + if diff <= int64(epsilon) { + return fmt.Sprintf("knife-edge: median %d within %d of the %d pass threshold", median, epsilon, evalPassThreshold) + } + } + return "" +} + +// escalationWindow resolves the task package's ladder.escalationWindow — the +// time the poster has to fund the round-1 eval budget (voucher arrival). +func escalationWindow(sb *monetizeapi.ServiceBounty) time.Duration { + t, err := bounty.Resolve(sb.Spec.Task.TypeRef) + if err != nil { + return defaultEscalationWindow + } + window, err := time.ParseDuration(t.Eval.Ladder.EscalationWindow) + if err != nil || window <= 0 { + return defaultEscalationWindow + } + return window +} + +// escalationEpsilon resolves the task package's ladder.escalationEpsilon. +func escalationEpsilon(sb *monetizeapi.ServiceBounty) int { + t, err := bounty.Resolve(sb.Spec.Task.TypeRef) + if err != nil { + return defaultEscalationEpsilon + } + return t.Eval.Ladder.EscalationEpsilon +} + +// openEscalation latches the single escalation round: a 2k+1 panel excluding +// the round-0 panel, the round-0 (open-door) participants, and the fulfiller. +// opened=false, retry=true is a transient selection failure (seed source / +// enrollment list) — the verdict stays unspoken and the trigger re-checks; +// opened=false, retry=false means the enrolled pool cannot seat a round-1 +// panel at all, so the round-0 median stands. +func (c *Controller) openEscalation(ctx context.Context, sb *monetizeapi.ServiceBounty, annotations map[string]string, status *monetizeapi.ServiceBountyStatus, reason string, now time.Time) (opened, retry bool) { + size := int(2*evalQuorumK(sb) + 1) + + exclude := make(map[string]bool) + for _, seat := range status.EvaluatorPanel { + exclude[common.HexToAddress(seat.Address).Hex()] = true + } + for _, evaluation := range status.Evaluations { + // Open-door round-0 participants are excluded too: a diverged + // evaluator must not grade their own divergence. + exclude[common.HexToAddress(evaluation.Address).Hex()] = true + } + if len(status.Claims) > 0 && common.IsHexAddress(status.Claims[0].FulfillerAddress) { + exclude[common.HexToAddress(status.Claims[0].FulfillerAddress).Hex()] = true + } + + // Panel selection reads the raw object shape (spec.task.typeRef, UID, + // creation timestamp, status.claims) — feed it the WORKING status so a + // claim promoted this reconcile is visible to the pair-diversity weights. + working := *sb + working.Status = *status + rawObject, err := runtime.DefaultUnstructuredConverter.ToUnstructured(&working) + if err != nil { + setPurchaseCondition(&status.Conditions, "Escalated", "False", "PanelUnavailable", + truncateMessage(fmt.Sprintf("escalation triggered (%s) but the bounty could not be encoded for panel selection: %v", reason, err))) + return false, true + } + panel, err := selectEscalationPanelFn(c, ctx, &unstructured.Unstructured{Object: rawObject}, size, exclude) + if err != nil { + setPurchaseCondition(&status.Conditions, "Escalated", "False", "PanelUnavailable", + truncateMessage(fmt.Sprintf("escalation triggered (%s) but no round-1 panel is available: %v", reason, err))) + return false, true + } + if len(panel) == 0 { + // Thin enrolled pool — same fallback posture as round 0's open door, + // except a round-1 open door would re-admit the very addresses the + // escalation excludes, so the round-0 median stands instead. + setPurchaseCondition(&status.Conditions, "Escalated", "False", "PanelExhausted", + truncateMessage(fmt.Sprintf("escalation triggered (%s) but the enrolled pool cannot seat a %d-member round-1 panel — the round-0 median stands", reason, size))) + return false, false + } + + voucherDeadline := metav1.NewTime(now.Add(escalationWindow(sb))) + status.Escalation = &monetizeapi.ServiceBountyEscalation{ + Round: 1, + Reason: reason, + Panel: panel, + VoucherDeadline: &voucherDeadline, + BudgetState: escrowStateAwaitingVoucher, + } + setPurchaseCondition(&status.Conditions, "Escalated", "True", "Escalated", truncateMessage(reason)) + c.reserveEscalationBudget(ctx, sb, annotations, status) + return true, false +} + +// runEscalation drives the open escalation to a conclusion. done=true means +// the escalation is resolved: either the round-1 cycle settled (its median is +// final) or the budget went Unfunded (round-0 median stands). done=false keeps +// the verdict unspoken; requeue covers the voucher/reveal deadlines. +func (c *Controller) runEscalation(ctx context.Context, sb *monetizeapi.ServiceBounty, annotations map[string]string, status *monetizeapi.ServiceBountyStatus, now time.Time) (done bool, requeue time.Duration) { + esc := status.Escalation + + if esc.BudgetState == "" || esc.BudgetState == escrowStateAwaitingVoucher { + c.reserveEscalationBudget(ctx, sb, annotations, status) + } + + switch esc.BudgetState { + case escrowStateUnfunded: + return true, 0 + case escrow.StateReserved, escrow.StateCaptured: + // funded — fall through to the round-1 cycle below + default: + // Still awaiting the voucher: past the escalation window the round-0 + // median stands; before it, wait (annotation arrival re-reconciles). + if esc.VoucherDeadline != nil && now.After(esc.VoucherDeadline.Time) { + esc.BudgetState = escrowStateUnfunded + setPurchaseCondition(&status.Conditions, "Escalated", "True", "EscalationUnfunded", + fmt.Sprintf("Escalation eval budget was never funded before %s — the round-0 median stands", esc.VoucherDeadline.UTC().Format(time.RFC3339))) + return true, 0 + } + setPurchaseCondition(&status.Conditions, "Escalated", "True", "EscrowAwaitingVoucher", + fmt.Sprintf("Escalation eval budget awaits the poster's Permit2 voucher (%s annotation)", bountyEvalVoucherR1Annotation)) + if esc.VoucherDeadline != nil { + return false, time.Until(esc.VoucherDeadline.Time) + time.Second + } + return false, 0 + } + + // Funded: full commit-reveal cycle, semantics identical to round 0. All + // 2k+1 seats are counting (no probation/shadow in round 1) and only panel + // members are admitted. + seats := make(map[string]string, len(esc.Panel)) + for _, seat := range esc.Panel { + seats[common.HexToAddress(seat.Address).Hex()] = seat.Seat + } + settled, roundRequeue := runEvalRound(annotations, evalRoundIO{ + commitPrefix: bountyEvalCommitR1Prefix, + revealPrefix: bountyEvalRevealR1Prefix, + seats: seats, + restrict: true, + k: int64(len(esc.Panel)), + window: revealWindow(sb), + evaluations: &esc.Evaluations, + deadline: &esc.RevealDeadline, + }, now) + return settled, roundRequeue +} + +// reserveEscalationBudget holds the round-1 eval budget — panel size × FULL +// perEvaluator, no probation discount — under -eval-r1, attaching the +// obol.org/eval-voucher-r1 Permit2 voucher when it has ferried in. Re-runs +// while AwaitingVoucher (idempotent at the facilitator). +func (c *Controller) reserveEscalationBudget(ctx context.Context, sb *monetizeapi.ServiceBounty, annotations map[string]string, status *monetizeapi.ServiceBountyStatus) { + esc := status.Escalation + if esc == nil || (esc.BudgetState != "" && esc.BudgetState != escrowStateAwaitingVoucher) { + return + } + per, err := strconv.ParseFloat(strings.TrimSpace(sb.Spec.Eval.Payment.PerEvaluator), 64) + if err != nil || per <= 0 { + // No eval-payment leg configured → nothing to fund; the round runs + // like a round-0 market without perEvaluator pricing (settle is a + // no-op for the same reason). + esc.BudgetState = escrow.StateReserved + return + } + total := strconv.FormatFloat(float64(len(esc.Panel))*per, 'f', 2, 64) + receipt, err := c.escrowGateway().Reserve(ctx, escrow.ReserveRequest{ + ID: string(sb.UID) + "-eval-r1", + Network: sb.Spec.Reward.Network, + PayTo: sb.Spec.Reward.PayTo, // poster refund address + Asset: sb.Spec.Eval.Payment.Asset, + Amount: total, + Scheme: sb.Spec.Reward.Escrow.Scheme, + Voucher: voucherFromAnnotations(annotations, bountyEvalVoucherR1Annotation), + }) + if err != nil { + log.Printf("serviceoffer-controller: reserve escalation budget for %s/%s: %v", sb.Namespace, sb.Name, err) + return + } + esc.BudgetState = receipt.State + ferryEscrowSpender(status, receipt) + if receipt.State == escrow.StateReserved { + setPurchaseCondition(&status.Conditions, "Escalated", "True", "EscalationFunded", + fmt.Sprintf("Round-1 panel of %d funded (%s); commit-reveal in progress", len(esc.Panel), total)) + } +} + +// settleEscalationBudget batch-settles the round-1 eval budget to every +// round-1 evaluator with a valid reveal — full price, win-or-lose. Non/bad +// reveals earn nothing, exactly like round 0. +func (c *Controller) settleEscalationBudget(ctx context.Context, sb *monetizeapi.ServiceBounty, status *monetizeapi.ServiceBountyStatus) { + esc := status.Escalation + if esc == nil || esc.BudgetState != escrow.StateReserved { + return + } + // Full price per round-1 seat, in atomic units when the asset resolves — + // capture recipients must match the poster's voucher seats exactly + // (see evalSeatAmounts). + amount, _, ok := evalSeatAmounts(sb) + if !ok { + return + } + + var recipients []escrow.BatchRecipient + var paidIdx []int + for i := range esc.Evaluations { + if esc.Evaluations[i].Phase != evalPhaseRevealed { + continue + } + recipients = append(recipients, escrow.BatchRecipient{ + Address: esc.Evaluations[i].Address, + Amount: amount, + }) + paidIdx = append(paidIdx, i) + } + if len(recipients) == 0 { + return // nothing to pay; refund path voids the budget + } + + var receipt escrow.Receipt + var err error + if batch, ok := c.escrowGateway().(escrow.BatchGateway); ok { + receipt, err = batch.CaptureBatch(ctx, string(sb.UID)+"-eval-r1", recipients) + } else { + receipt, err = c.escrowGateway().Capture(ctx, string(sb.UID)+"-eval-r1") + } + if err != nil { + log.Printf("serviceoffer-controller: settle escalation budget for %s/%s: %v", sb.Namespace, sb.Name, err) + return + } + esc.BudgetState = receipt.State + ferryEscrowSpender(status, receipt) + for _, i := range paidIdx { + esc.Evaluations[i].Paid = true + } +} diff --git a/internal/serviceoffercontroller/bounty_escalation_test.go b/internal/serviceoffercontroller/bounty_escalation_test.go new file mode 100644 index 00000000..88f9f421 --- /dev/null +++ b/internal/serviceoffercontroller/bounty_escalation_test.go @@ -0,0 +1,785 @@ +package serviceoffercontroller + +import ( + "context" + "errors" + "fmt" + "strings" + "sync" + "testing" + "time" + + "github.com/ObolNetwork/obol-stack/internal/erc8004" + "github.com/ObolNetwork/obol-stack/internal/monetizeapi" + "github.com/ObolNetwork/obol-stack/internal/x402/escrow" + "github.com/ethereum/go-ethereum/common" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" +) + +// ── fakes ─────────────────────────────────────────────────────────────────── + +// fakeEscrowGateway is a voucher-aware escrow fake: ids listed in +// requireVoucher answer AwaitingVoucher until a Reserve carries a voucher, +// captures can be forced to fail, and every request/batch is recorded. +type fakeEscrowGateway struct { + mu sync.Mutex + spender string + requireVoucher map[string]bool + captureErr map[string]error + reserves map[string][]escrow.ReserveRequest + states map[string]string + batches map[string][]escrow.BatchRecipient +} + +func newFakeEscrow() *fakeEscrowGateway { + return &fakeEscrowGateway{ + requireVoucher: map[string]bool{}, + captureErr: map[string]error{}, + reserves: map[string][]escrow.ReserveRequest{}, + states: map[string]string{}, + batches: map[string][]escrow.BatchRecipient{}, + } +} + +func (f *fakeEscrowGateway) Reserve(_ context.Context, req escrow.ReserveRequest) (escrow.Receipt, error) { + f.mu.Lock() + defer f.mu.Unlock() + f.reserves[req.ID] = append(f.reserves[req.ID], req) + if state := f.states[req.ID]; state == escrow.StateCaptured || state == escrow.StateVoided { + return escrow.Receipt{State: state, Spender: f.spender}, nil + } + if f.requireVoucher[req.ID] && req.Voucher == nil { + f.states[req.ID] = escrowStateAwaitingVoucher + return escrow.Receipt{State: escrowStateAwaitingVoucher, Spender: f.spender}, nil + } + f.states[req.ID] = escrow.StateReserved + return escrow.Receipt{State: escrow.StateReserved, Spender: f.spender}, nil +} + +func (f *fakeEscrowGateway) capture(id string) (escrow.Receipt, error) { + if err := f.captureErr[id]; err != nil { + return escrow.Receipt{}, err + } + f.states[id] = escrow.StateCaptured + return escrow.Receipt{State: escrow.StateCaptured, TxHash: "fake-capture:" + id, Spender: f.spender}, nil +} + +func (f *fakeEscrowGateway) Capture(_ context.Context, id string) (escrow.Receipt, error) { + f.mu.Lock() + defer f.mu.Unlock() + return f.capture(id) +} + +func (f *fakeEscrowGateway) CaptureBatch(_ context.Context, id string, recipients []escrow.BatchRecipient) (escrow.Receipt, error) { + f.mu.Lock() + defer f.mu.Unlock() + receipt, err := f.capture(id) + if err != nil { + return escrow.Receipt{}, err + } + f.batches[id] = recipients + return receipt, nil +} + +func (f *fakeEscrowGateway) Void(_ context.Context, id string) (escrow.Receipt, error) { + f.mu.Lock() + defer f.mu.Unlock() + f.states[id] = escrow.StateVoided + return escrow.Receipt{State: escrow.StateVoided, Spender: f.spender}, nil +} + +func (f *fakeEscrowGateway) lastReserve(t *testing.T, id string) escrow.ReserveRequest { + t.Helper() + f.mu.Lock() + defer f.mu.Unlock() + reqs := f.reserves[id] + if len(reqs) == 0 { + t.Fatalf("no Reserve recorded for %s", id) + } + return reqs[len(reqs)-1] +} + +// fakeValidationReader is the grounding chain fake. +type fakeValidationReader struct { + statuses map[common.Hash]erc8004.ValidationStatus + readErr error +} + +func (f *fakeValidationReader) ValidationStatus(_ context.Context, h common.Hash) (erc8004.ValidationStatus, error) { + if f.readErr != nil { + return erc8004.ValidationStatus{}, f.readErr + } + return f.statuses[h], nil +} + +func stubValidationReader(t *testing.T, reader bountyValidationReader, dialErr error) { + t.Helper() + orig := bountyValidationReaderFactory + bountyValidationReaderFactory = func(context.Context, string, string) (bountyValidationReader, func(), error) { + if dialErr != nil { + return nil, nil, dialErr + } + return reader, func() {}, nil + } + t.Cleanup(func() { bountyValidationReaderFactory = orig }) +} + +func stubEscalationPanel(t *testing.T, panel []monetizeapi.ServiceBountyPanelSeat, err error) { + t.Helper() + orig := selectEscalationPanelFn + selectEscalationPanelFn = func(*Controller, context.Context, *unstructured.Unstructured, int, map[string]bool) ([]monetizeapi.ServiceBountyPanelSeat, error) { + return panel, err + } + t.Cleanup(func() { selectEscalationPanelFn = orig }) +} + +// ── helpers ───────────────────────────────────────────────────────────────── + +func r1Addr(i int) string { + return common.HexToAddress(fmt.Sprintf("0x%040x", 0xe100+i)).Hex() +} + +func r1Panel(size int) []monetizeapi.ServiceBountyPanelSeat { + seats := make([]monetizeapi.ServiceBountyPanelSeat, 0, size) + for i := 0; i < size; i++ { + seats = append(seats, monetizeapi.ServiceBountyPanelSeat{Address: r1Addr(i), Seat: monetizeapi.PanelSeatFull}) + } + return seats +} + +// addRound0 writes commit+reveal annotation pairs for a direct +// reconcileEvalMarket invocation (commits promote and reveals grade in the +// same pass once K commitments are present). +func addRound0(annotations map[string]string, scores map[string]int64) { + for addr, score := range scores { + annotations[bountyEvalCommitPrefix+addr] = monetizeapi.EvalCommitHash(score, "salt-"+addr, addr) + annotations[bountyEvalRevealPrefix+addr] = fmt.Sprintf(`{"score":%d,"salt":"salt-%s"}`, score, addr) + } +} + +func addRound1Commits(annotations map[string]string, scores map[string]int64) { + for addr, score := range scores { + annotations[bountyEvalCommitR1Prefix+addr] = monetizeapi.EvalCommitHash(score, "r1salt-"+addr, addr) + } +} + +func addRound1Reveals(annotations map[string]string, scores map[string]int64) { + for addr, score := range scores { + annotations[bountyEvalRevealR1Prefix+addr] = fmt.Sprintf(`{"score":%d,"salt":"r1salt-%s"}`, score, addr) + } +} + +func commitAndRevealR1(t *testing.T, c *Controller, ns, name string, scores map[string]int64) { + t.Helper() + key := ns + "/" + name + for addr, score := range scores { + annotateBounty(t, c, ns, name, map[string]string{ + bountyEvalCommitR1Prefix + addr: monetizeapi.EvalCommitHash(score, "r1salt-"+addr, addr), + }) + } + reconcileBountyUntilSettled(t, c, key) + for addr, score := range scores { + annotateBounty(t, c, ns, name, map[string]string{ + bountyEvalRevealR1Prefix + addr: fmt.Sprintf(`{"score":%d,"salt":"r1salt-%s"}`, score, addr), + }) + } + reconcileBountyUntilSettled(t, c, key) +} + +func bountyConditionMessage(conditions []monetizeapi.Condition, conditionType string) string { + for _, condition := range conditions { + if condition.Type == conditionType { + return condition.Message + } + } + return "" +} + +// ── trigger (pure) ────────────────────────────────────────────────────────── + +func revealedEval(addr string, score int64, withinBand bool) monetizeapi.ServiceBountyEvaluation { + return monetizeapi.ServiceBountyEvaluation{Address: addr, Phase: evalPhaseRevealed, Score: score, WithinBand: withinBand} +} + +func TestEscalationTrigger_Dispersion(t *testing.T) { + // ceil(3/2)=2 out-of-band reveals trigger; 1 does not. + one := []monetizeapi.ServiceBountyEvaluation{ + revealedEval(evalA, 85, true), revealedEval(evalB, 90, true), revealedEval(evalC, 20, false), + } + if got := escalationTrigger(one, 3, 85, 5); got != "" { + t.Fatalf("1 of 3 out of band must not trigger, got %q", got) + } + two := []monetizeapi.ServiceBountyEvaluation{ + revealedEval(evalA, 0, false), revealedEval(evalB, 75, true), revealedEval(evalC, 100, false), + } + got := escalationTrigger(two, 3, 75, 5) + if !strings.Contains(got, "dispersion") { + t.Fatalf("2 of 3 out of band must trigger dispersion, got %q", got) + } + + // Non-reveals are penalized, not dispersion: they never count. + nonReveals := []monetizeapi.ServiceBountyEvaluation{ + revealedEval(evalA, 85, true), + {Address: evalB, Phase: evalPhaseNonReveal, WithinBand: false}, + {Address: evalC, Phase: evalPhaseNonReveal, WithinBand: false}, + } + if got := escalationTrigger(nonReveals, 3, 85, 5); got != "" { + t.Fatalf("non-reveals must not count toward dispersion, got %q", got) + } + + // Shadow seats never count either. + shadow := []monetizeapi.ServiceBountyEvaluation{ + revealedEval(evalA, 85, true), + {Address: evalB, Phase: evalPhaseRevealed, Score: 0, WithinBand: false, Seat: monetizeapi.PanelSeatShadow}, + {Address: evalC, Phase: evalPhaseRevealed, Score: 100, WithinBand: false, Seat: monetizeapi.PanelSeatShadow}, + } + if got := escalationTrigger(shadow, 3, 85, 5); got != "" { + t.Fatalf("shadow divergence must not trigger dispersion, got %q", got) + } +} + +func TestEscalationTrigger_KnifeEdge(t *testing.T) { + inBand := []monetizeapi.ServiceBountyEvaluation{ + revealedEval(evalA, 52, true), revealedEval(evalB, 53, true), revealedEval(evalC, 54, true), + } + if got := escalationTrigger(inBand, 3, 53, 5); !strings.Contains(got, "knife-edge") { + t.Fatalf("median 53 within 5 of 50 must trigger knife-edge, got %q", got) + } + if got := escalationTrigger(inBand, 3, 56, 5); got != "" { + t.Fatalf("median 56 is outside epsilon 5, got %q", got) + } + // |median-threshold| == epsilon is inclusive. + if got := escalationTrigger(inBand, 3, 45, 5); !strings.Contains(got, "knife-edge") { + t.Fatalf("median 45 at exactly epsilon 5 must trigger, got %q", got) + } +} + +func TestEscalationTrigger_EpsilonZeroDisablesKnifeEdge(t *testing.T) { + dead := []monetizeapi.ServiceBountyEvaluation{ + revealedEval(evalA, 50, true), revealedEval(evalB, 50, true), revealedEval(evalC, 50, true), + } + if got := escalationTrigger(dead, 3, 50, 0); got != "" { + t.Fatalf("epsilon 0 must disable the knife-edge trigger, got %q", got) + } + if got := escalationTrigger(dead, 3, 50, 5); got == "" { + t.Fatal("epsilon 5 with a dead-center median must trigger") + } +} + +// ── escalation lifecycle (e2e through reconcileBounty) ───────────────────── + +func TestEscalation_DispersionTriggersAndRound1MedianIsFinal(t *testing.T) { + sb := testEvalBounty("escalate") + c := newBountyTestController(t, sb) + fake := newFakeEscrow() + c.bountyEscrow = fake + stubEscalationPanel(t, r1Panel(7), nil) + ns := "hermes-obol-agent" + + claimAndSubmit(t, c, ns, "escalate") + // Round 0: median 75 (would PASS), but 0 and 100 are out of band → + // dispersion (2 ≥ ⌈3/2⌉). + commitAndReveal(t, c, ns, "escalate", map[string]int64{evalA: 0, evalB: 75, evalC: 100}) + + got := getBounty(t, c, ns, "escalate") + esc := got.Status.Escalation + if esc == nil { + t.Fatal("escalation must open on dispersion") + } + if esc.Round != 1 || !strings.Contains(esc.Reason, "dispersion") { + t.Fatalf("escalation = round %d reason %q, want round 1 dispersion", esc.Round, esc.Reason) + } + if len(esc.Panel) != 7 { + t.Fatalf("round-1 panel size = %d, want 2k+1 = 7", len(esc.Panel)) + } + if esc.BudgetState != escrow.StateReserved { + t.Fatalf("escalation budget = %q, want Reserved (fake funds without voucher)", esc.BudgetState) + } + if reason := conditionReason(got.Status.Conditions, "Verified"); reason == "EvaluatorQuorum" { + t.Fatal("the EvaluatorQuorum verdict must NOT be spoken while the escalation is open") + } + // 7 seats × full 2.00 — no probation discount in round 1. + if req := fake.lastReserve(t, "uid-escalate-eval-r1"); req.Amount != "14.00" { + t.Fatalf("round-1 reserve amount = %q, want 14.00", req.Amount) + } + + // Round 1: median 30 → the ROUND-0 pass is overridden; round-1 is final. + r1Scores := map[string]int64{} + for i, score := range []int64{10, 20, 30, 30, 30, 90, 95} { + r1Scores[r1Addr(i)] = score + } + commitAndRevealR1(t, c, ns, "escalate", r1Scores) + + got = getBounty(t, c, ns, "escalate") + if bountyConditionIsTrue(got.Status.Conditions, "Verified") { + t.Fatal("round-1 median 30 < 50 must reject even though round-0 median was 75") + } + if reason := conditionReason(got.Status.Conditions, "Verified"); reason != "EvaluatorQuorum" { + t.Fatalf("Verified reason = %q, want EvaluatorQuorum (escalation keeps the quorum reason)", reason) + } + if msg := bountyConditionMessage(got.Status.Conditions, "Verified"); !strings.Contains(msg, "escalated") { + t.Fatalf("Verified message must note the escalation, got %q", msg) + } + if got.Status.WeightedScore != 30 { + t.Fatalf("WeightedScore = %d, want round-1 median 30", got.Status.WeightedScore) + } + if got.Status.Phase != bountyPhaseRejected { + t.Fatalf("phase = %q, want Rejected", got.Status.Phase) + } + if got.Status.Escalation.BudgetState != escrow.StateCaptured { + t.Fatalf("escalation budget = %q, want Captured (evaluators paid win-or-lose)", got.Status.Escalation.BudgetState) + } + recipients := fake.batches["uid-escalate-eval-r1"] + if len(recipients) != 7 { + t.Fatalf("round-1 batch recipients = %d, want 7", len(recipients)) + } + for _, recipient := range recipients { + if recipient.Amount != "2.00" { + t.Fatalf("round-1 evaluator %s paid %q, want full 2.00", recipient.Address, recipient.Amount) + } + } + for _, evaluation := range got.Status.Escalation.Evaluations { + if !evaluation.Paid { + t.Fatalf("round-1 evaluator %s not marked Paid", evaluation.Address) + } + } +} + +func TestEscalation_KnifeEdgeTriggers(t *testing.T) { + sb := testEvalBounty("knife") + c := newBountyTestController(t, sb) + c.bountyEscrow = newFakeEscrow() + stubEscalationPanel(t, r1Panel(7), nil) + ns := "hermes-obol-agent" + + claimAndSubmit(t, c, ns, "knife") + // Median 53 — all in band (no dispersion) but within epsilon 5 of 50. + commitAndReveal(t, c, ns, "knife", map[string]int64{evalA: 52, evalB: 53, evalC: 54}) + + got := getBounty(t, c, ns, "knife") + if got.Status.Escalation == nil { + t.Fatal("knife-edge median must escalate") + } + if !strings.Contains(got.Status.Escalation.Reason, "knife-edge") { + t.Fatalf("escalation reason = %q, want knife-edge", got.Status.Escalation.Reason) + } + if reason := conditionReason(got.Status.Conditions, "Verified"); reason == "EvaluatorQuorum" { + t.Fatal("verdict must wait for the escalation round") + } +} + +func TestEscalation_SingleRoundLatch(t *testing.T) { + sb := testEvalBounty("latch") + c := newBountyTestController(t, sb) + c.bountyEscrow = newFakeEscrow() + stubEscalationPanel(t, r1Panel(7), nil) + ns := "hermes-obol-agent" + + claimAndSubmit(t, c, ns, "latch") + commitAndReveal(t, c, ns, "latch", map[string]int64{evalA: 0, evalB: 75, evalC: 100}) + + // Round 1 lands knife-edge AND dispersed — conditions that would trigger + // again — but escalation is a single-round latch: its median is FINAL. + r1Scores := map[string]int64{} + for i, score := range []int64{0, 10, 50, 50, 52, 90, 100} { + r1Scores[r1Addr(i)] = score + } + commitAndRevealR1(t, c, ns, "latch", r1Scores) + + got := getBounty(t, c, ns, "latch") + if got.Status.Escalation == nil || got.Status.Escalation.Round != 1 { + t.Fatalf("escalation = %+v, want the single round 1", got.Status.Escalation) + } + if !bountyConditionIsTrue(got.Status.Conditions, "Verified") { + t.Fatal("round-1 median 50 >= 50 must verify") + } + if got.Status.WeightedScore != 50 { + t.Fatalf("WeightedScore = %d, want round-1 median 50", got.Status.WeightedScore) + } + if len(got.Status.Escalation.Evaluations) != 7 { + t.Fatalf("round-1 evaluations = %d, want 7", len(got.Status.Escalation.Evaluations)) + } + + // Extra reconciles never re-open a second round or move the verdict. + reconcileBountyUntilSettled(t, c, ns+"/latch") + again := getBounty(t, c, ns, "latch") + if again.Status.Escalation.Round != 1 || len(again.Status.Escalation.Evaluations) != 7 { + t.Fatalf("escalation re-opened: %+v", again.Status.Escalation) + } + if again.Status.WeightedScore != 50 { + t.Fatalf("verdict moved after latch: WeightedScore = %d", again.Status.WeightedScore) + } +} + +func TestEscalation_ExcludesRound0PanelAndFulfiller(t *testing.T) { + sb := testEvalBounty("exclude") + c := newBountyTestController(t, sb) + c.bountyEscrow = newFakeEscrow() + + var gotSize int + var gotExclude map[string]bool + orig := selectEscalationPanelFn + selectEscalationPanelFn = func(_ *Controller, _ context.Context, _ *unstructured.Unstructured, size int, exclude map[string]bool) ([]monetizeapi.ServiceBountyPanelSeat, error) { + gotSize = size + gotExclude = exclude + return r1Panel(7), nil + } + t.Cleanup(func() { selectEscalationPanelFn = orig }) + + ns := "hermes-obol-agent" + claimAndSubmit(t, c, ns, "exclude") + commitAndReveal(t, c, ns, "exclude", map[string]int64{evalA: 0, evalB: 75, evalC: 100}) + + if gotSize != 7 { + t.Fatalf("escalation panel size = %d, want 2k+1 = 7", gotSize) + } + for _, addr := range []string{evalA, evalB, evalC, "0x2222222222222222222222222222222222222222"} { + if !gotExclude[common.HexToAddress(addr).Hex()] { + t.Errorf("exclude set must contain %s (round-0 participant or fulfiller)", addr) + } + } +} + +// ── escalation funding (direct invocation for clock control) ─────────────── + +func TestEscalation_UnfundedFallbackPreservesRound0Verdict(t *testing.T) { + c := newBountyTestController(t) + fake := newFakeEscrow() + fake.requireVoucher["uid-unfunded-eval-r1"] = true + c.bountyEscrow = fake + stubEscalationPanel(t, r1Panel(7), nil) + + sb := testEvalBounty("unfunded") + status := &monetizeapi.ServiceBountyStatus{} + annotations := map[string]string{} + addRound0(annotations, map[string]int64{evalA: 0, evalB: 75, evalC: 100}) + + now0 := time.Now() + requeue := c.reconcileEvalMarket(context.Background(), sb, annotations, status, now0) + if status.Escalation == nil || status.Escalation.BudgetState != escrowStateAwaitingVoucher { + t.Fatalf("escalation = %+v, want AwaitingVoucher", status.Escalation) + } + if reason := conditionReason(status.Conditions, "Verified"); reason != "" { + t.Fatalf("no verdict may be spoken while the escalation awaits funding, got reason %q", reason) + } + if reason := conditionReason(status.Conditions, "Escalated"); reason != "EscrowAwaitingVoucher" { + t.Fatalf("Escalated reason = %q, want EscrowAwaitingVoucher", reason) + } + if requeue <= 0 { + t.Fatal("an awaiting-voucher escalation must requeue for its deadline") + } + + // Past the escalation window (benchmark@v1 ladder: 30m) with no voucher: + // Unfunded, and the round-0 median (75 → pass) stands. + c.reconcileEvalMarket(context.Background(), sb, annotations, status, now0.Add(31*time.Minute)) + if status.Escalation.BudgetState != escrowStateUnfunded { + t.Fatalf("escalation budget = %q, want Unfunded", status.Escalation.BudgetState) + } + if reason := conditionReason(status.Conditions, "Escalated"); reason != "EscalationUnfunded" { + t.Fatalf("Escalated reason = %q, want EscalationUnfunded", reason) + } + if !bountyConditionIsTrue(status.Conditions, "Verified") { + t.Fatal("round-0 median 75 must verify when the escalation goes unfunded") + } + if reason := conditionReason(status.Conditions, "Verified"); reason != "EvaluatorQuorum" { + t.Fatalf("Verified reason = %q, want EvaluatorQuorum", reason) + } + if status.WeightedScore != 75 { + t.Fatalf("WeightedScore = %d, want round-0 median 75", status.WeightedScore) + } + if msg := bountyConditionMessage(status.Conditions, "Verified"); strings.Contains(msg, "escalated") { + t.Fatalf("an unfunded escalation must not claim a round-1 verdict: %q", msg) + } + if len(status.Escalation.Evaluations) != 0 { + t.Fatal("an unfunded escalation must never run a round-1 cycle") + } +} + +func TestEscalation_LateVoucherFundsRound1(t *testing.T) { + c := newBountyTestController(t) + fake := newFakeEscrow() + fake.requireVoucher["uid-late-eval-r1"] = true + c.bountyEscrow = fake + stubEscalationPanel(t, r1Panel(7), nil) + + sb := testEvalBounty("late") + status := &monetizeapi.ServiceBountyStatus{} + annotations := map[string]string{} + addRound0(annotations, map[string]int64{evalA: 0, evalB: 75, evalC: 100}) + + now0 := time.Now() + c.reconcileEvalMarket(context.Background(), sb, annotations, status, now0) + if status.Escalation.BudgetState != escrowStateAwaitingVoucher { + t.Fatalf("budget = %q, want AwaitingVoucher", status.Escalation.BudgetState) + } + + // The voucher annotation ferries in before the deadline → RE-reserve + // picks it up and the budget funds. + annotations[bountyEvalVoucherR1Annotation] = `{"owner":"0x1111111111111111111111111111111111111111","token":"0x036CbD53842c5426634e7929541eC2318f3dCF7e","network":"base","spender":"0xFAC0000000000000000000000000000000000FAC","nonce":"42","deadline":1893456000,"recipients":[{"address":"` + r1Addr(0) + `","amount":"2000000"}],"signature":"0xsig"}` + c.reconcileEvalMarket(context.Background(), sb, annotations, status, now0.Add(5*time.Minute)) + if status.Escalation.BudgetState != escrow.StateReserved { + t.Fatalf("budget = %q, want Reserved after the voucher arrives", status.Escalation.BudgetState) + } + req := fake.lastReserve(t, "uid-late-eval-r1") + if req.Voucher == nil { + t.Fatal("re-reserve must attach the ferried voucher") + } + if req.Voucher.Nonce != "42" || req.Voucher.Owner != "0x1111111111111111111111111111111111111111" { + t.Fatalf("voucher fields not ferried intact: %+v", req.Voucher) + } + if reason := conditionReason(status.Conditions, "Escalated"); reason != "EscalationFunded" { + t.Fatalf("Escalated reason = %q, want EscalationFunded", reason) + } +} + +func TestEscalation_Round1NonRevealPenalty(t *testing.T) { + c := newBountyTestController(t) + fake := newFakeEscrow() + c.bountyEscrow = fake + stubEscalationPanel(t, r1Panel(7), nil) + + sb := testEvalBounty("r1silent") + status := &monetizeapi.ServiceBountyStatus{} + annotations := map[string]string{} + addRound0(annotations, map[string]int64{evalA: 0, evalB: 75, evalC: 100}) + + now0 := time.Now() + c.reconcileEvalMarket(context.Background(), sb, annotations, status, now0) + if status.Escalation == nil || status.Escalation.BudgetState != escrow.StateReserved { + t.Fatalf("escalation = %+v, want funded", status.Escalation) + } + + // All 7 commit; the reveal window opens. + r1Scores := map[string]int64{} + for i := 0; i < 7; i++ { + r1Scores[r1Addr(i)] = 80 + } + addRound1Commits(annotations, r1Scores) + c.reconcileEvalMarket(context.Background(), sb, annotations, status, now0.Add(time.Minute)) + if status.Escalation.RevealDeadline == nil { + t.Fatal("round-1 reveal window must open once all 2k+1 commitments are in") + } + + // Only 6 reveal. Before the deadline the round is not settled. + silent := r1Addr(6) + revealed := map[string]int64{} + for addr, score := range r1Scores { + if addr != silent { + revealed[addr] = score + } + } + addRound1Reveals(annotations, revealed) + c.reconcileEvalMarket(context.Background(), sb, annotations, status, now0.Add(2*time.Minute)) + if reason := conditionReason(status.Conditions, "Verified"); reason == "EvaluatorQuorum" { + t.Fatal("round 1 must not settle while a commitment is unrevealed inside the window") + } + + // Past the round-1 reveal window: the silent seat grades NonReveal — + // worst-case outlier, unpaid — and the median settles over the 6 reveals. + c.reconcileEvalMarket(context.Background(), sb, annotations, status, now0.Add(20*time.Minute)) + if !bountyConditionIsTrue(status.Conditions, "Verified") { + t.Fatal("round-1 median 80 must verify") + } + if status.WeightedScore != 80 { + t.Fatalf("WeightedScore = %d, want 80", status.WeightedScore) + } + var silentEval *monetizeapi.ServiceBountyEvaluation + for i := range status.Escalation.Evaluations { + if status.Escalation.Evaluations[i].Address == silent { + silentEval = &status.Escalation.Evaluations[i] + } + } + if silentEval == nil { + t.Fatal("silent round-1 evaluator missing from escalation evaluations") + } + if silentEval.Phase != evalPhaseNonReveal { + t.Fatalf("silent evaluator phase = %q, want NonReveal", silentEval.Phase) + } + if silentEval.WithinBand { + t.Fatal("a round-1 non-reveal must grade as a worst-case outlier") + } + if silentEval.Paid { + t.Fatal("a round-1 non-reveal must not be paid") + } + if len(fake.batches["uid-r1silent-eval-r1"]) != 6 { + t.Fatalf("round-1 batch = %d recipients, want 6 (non-reveal earns nothing)", len(fake.batches["uid-r1silent-eval-r1"])) + } +} + +// ── grounding ─────────────────────────────────────────────────────────────── + +func TestGrounding_Matrix(t *testing.T) { + const score = int64(90) + canonical := common.HexToAddress(evalA) + + cases := []struct { + name string + statuses map[common.Hash]erc8004.ValidationStatus + dialErr error + readErr error + grounded bool + wantReason string + wantInMsg string + }{ + { + name: "match grounds", + statuses: map[common.Hash]erc8004.ValidationStatus{ + erc8004.BountyEvalRequestHash("uid-ground", canonical.Hex()): {ValidatorAddress: canonical, Response: 90}, + }, + grounded: true, + wantReason: "Grounded", + }, + { + name: "wrong responder stays ungrounded", + statuses: map[common.Hash]erc8004.ValidationStatus{ + erc8004.BountyEvalRequestHash("uid-ground", canonical.Hex()): {ValidatorAddress: common.HexToAddress(evalB), Response: 90}, + }, + wantReason: "NotGrounded", + wantInMsg: "not the evaluator", + }, + { + name: "wrong score stays ungrounded", + statuses: map[common.Hash]erc8004.ValidationStatus{ + erc8004.BountyEvalRequestHash("uid-ground", canonical.Hex()): {ValidatorAddress: canonical, Response: 10}, + }, + wantReason: "NotGrounded", + wantInMsg: "on-chain response 10", + }, + { + name: "no on-chain entry stays ungrounded", + statuses: map[common.Hash]erc8004.ValidationStatus{}, + wantReason: "NotGrounded", + wantInMsg: "no on-chain validation entry", + }, + { + name: "chain down stays ungrounded", + dialErr: errors.New("erpc unreachable"), + wantReason: "ChainUnreachable", + wantInMsg: "unreachable", + }, + { + name: "chain read error stays ungrounded", + statuses: nil, + readErr: errors.New("rpc timeout"), + wantReason: "NotGrounded", + wantInMsg: "chain read failed", + }, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + stubValidationReader(t, &fakeValidationReader{statuses: tc.statuses, readErr: tc.readErr}, tc.dialErr) + + c := newBountyTestController(t) + c.bountyEscrow = newFakeEscrow() + sb := testEvalBounty("ground") + sb.Spec.Eval.K = 1 + status := &monetizeapi.ServiceBountyStatus{} + annotations := map[string]string{ + bountyEvalCommitPrefix + evalA: monetizeapi.EvalCommitHash(score, "salt-g", evalA), + bountyEvalRevealPrefix + evalA: fmt.Sprintf(`{"score":%d,"salt":"salt-g","validationTx":"0xfeed"}`, score), + } + c.reconcileEvalMarket(context.Background(), sb, annotations, status, time.Now()) + + if len(status.Evaluations) != 1 { + t.Fatalf("evaluations = %d, want 1", len(status.Evaluations)) + } + if status.Evaluations[0].Grounded != tc.grounded { + t.Fatalf("Grounded = %v, want %v", status.Evaluations[0].Grounded, tc.grounded) + } + if reason := conditionReason(status.Conditions, "EvalGrounded"); reason != tc.wantReason { + t.Fatalf("EvalGrounded reason = %q, want %q", reason, tc.wantReason) + } + if tc.wantInMsg != "" { + if msg := bountyConditionMessage(status.Conditions, "EvalGrounded"); !strings.Contains(msg, tc.wantInMsg) { + t.Fatalf("EvalGrounded message %q must contain %q", msg, tc.wantInMsg) + } + } + // Grounding NEVER blocks or changes the verdict: median 90 passes + // in every case. + if !bountyConditionIsTrue(status.Conditions, "Verified") { + t.Fatal("verdict must not depend on grounding") + } + if status.WeightedScore != score { + t.Fatalf("WeightedScore = %d, want %d", status.WeightedScore, score) + } + }) + } +} + +func TestGrounding_NoValidationTxDialsNothing(t *testing.T) { + // A reveal without validationTx must never dial the chain: the factory + // stub fails the test if invoked. + orig := bountyValidationReaderFactory + bountyValidationReaderFactory = func(context.Context, string, string) (bountyValidationReader, func(), error) { + t.Fatal("grounding must not dial the chain when no reveal carries validationTx") + return nil, nil, nil + } + t.Cleanup(func() { bountyValidationReaderFactory = orig }) + + c := newBountyTestController(t) + c.bountyEscrow = newFakeEscrow() + sb := testEvalBounty("nodial") + sb.Spec.Eval.K = 1 + status := &monetizeapi.ServiceBountyStatus{} + annotations := map[string]string{ + bountyEvalCommitPrefix + evalA: monetizeapi.EvalCommitHash(90, "s", evalA), + bountyEvalRevealPrefix + evalA: `{"score":90,"salt":"s"}`, + } + c.reconcileEvalMarket(context.Background(), sb, annotations, status, time.Now()) + if reason := conditionReason(status.Conditions, "EvalGrounded"); reason != "" { + t.Fatalf("EvalGrounded condition must not exist without validationTx claims, got %q", reason) + } +} + +// ── escrow config provenance ──────────────────────────────────────────────── + +// TestBountyEscrowGateway_ConfigFromEnvOnly re-asserts the seam invariant: +// the escrow endpoint + bearer token come ONLY from controller env. Nothing +// in a bounty's spec or annotations selects or redirects the gateway. +func TestBountyEscrowGateway_ConfigFromEnvOnly(t *testing.T) { + t.Setenv("OBOL_BOUNTY_ESCROW_URL", "https://facilitator.internal.example") + t.Setenv("OBOL_BOUNTY_ESCROW_TOKEN", "release-authority-token") + gateway := newBountyEscrowGateway() + httpGateway, ok := gateway.(*escrow.HTTPGateway) + if !ok { + t.Fatalf("gateway = %T, want *escrow.HTTPGateway when env is set", gateway) + } + if httpGateway.Base != "https://facilitator.internal.example" || httpGateway.Token != "release-authority-token" { + t.Fatalf("gateway config = %q/%q, want env values", httpGateway.Base, httpGateway.Token) + } + + t.Setenv("OBOL_BOUNTY_ESCROW_URL", "") + if _, ok := newBountyEscrowGateway().(*escrow.LedgerGateway); !ok { + t.Fatal("no env URL must fall back to the dev ledger") + } +} + +func TestBountyEscrow_AnnotationsCannotRedirectGateway(t *testing.T) { + fake := newFakeEscrow() + sb := testBounty("hostile") + c := newBountyTestController(t, sb) + c.bountyEscrow = fake + ns := "hermes-obol-agent" + + // Hostile annotations trying to smuggle endpoint/credential config (and a + // voucher whose unknown fields are ignored by the typed decode). + annotateBounty(t, c, ns, "hostile", map[string]string{ + "obol.org/escrow-url": "http://attacker.example", + "obol.org/escrow-token": "stolen", + "obol.org/escrow-facilitator": "http://attacker.example", + bountyRewardVoucherAnnotation: `{"owner":"0x1111111111111111111111111111111111111111","base":"http://attacker.example","token":"0x036CbD53842c5426634e7929541eC2318f3dCF7e","nonce":"1","deadline":1,"signature":"0x00"}`, + }) + reconcileBountyUntilSettled(t, c, ns+"/hostile") + + // The injected gateway received the reserve — the annotations selected + // nothing. The voucher decoded only its typed Permit2 fields. + req := fake.lastReserve(t, "uid-hostile") + if req.Voucher == nil || req.Voucher.Owner != "0x1111111111111111111111111111111111111111" { + t.Fatalf("voucher not ferried: %+v", req.Voucher) + } + got := getBounty(t, c, ns, "hostile") + if got.Status.EscrowState != escrow.StateReserved { + t.Fatalf("EscrowState = %q, want Reserved via the env-configured gateway", got.Status.EscrowState) + } +} diff --git a/internal/serviceoffercontroller/bounty_eval.go b/internal/serviceoffercontroller/bounty_eval.go new file mode 100644 index 00000000..85546772 --- /dev/null +++ b/internal/serviceoffercontroller/bounty_eval.go @@ -0,0 +1,634 @@ +package serviceoffercontroller + +// Eval-market pass — the verification-by-default slice (design doc §11). +// +// Evaluators interact through per-address annotations (the same k8s-native +// write channel as claim/submit, keyed per evaluator so concurrent writers +// never last-write-wins each other): +// +// obol.org/eval-commit- = EvalCommitHash(score, salt, addr) +// obol.org/eval-reveal- = {"score":N,"salt":"…"} +// +// Discipline (the research amendments, plans/evaluator-market-research-notes.md): +// - commitments are ADDRESS-BOUND (Kleros §4.3) — copying another +// evaluator's commit hash makes your own reveal unverifiable; +// - no reveal is processed until K commitments are in (commit window +// closes before any reveal opens); +// - a missing reveal past the reveal window is graded as a worst-case +// outlier (nonRevealPenalty) — silent abstention is never the cheap exit; +// - quorum = MEDIAN of revealed scores (robust to one outlier, which is +// what makes the future probation seat verdict-safe); +// - WithinBand records divergence from the median per evaluator — the +// per-bounty bookkeeping hook the reputation ladder keys on; +// - a diverged or knife-edge round 0 escalates ONCE to a fresh 2k+1 panel +// whose median is final (bounty_escalation.go); +// - reveals carrying a validationTx are grounded against the on-chain +// ERC-8004 entry before ladder bookkeeping (bounty_grounding.go). +// +// Money legs ferried here: Permit2 vouchers ride in on annotations +// (obol.org/{reward,bond,eval}-voucher[-r1]) and are attached to the matching +// escrow ReserveRequest. The controller still signs NOTHING — a voucher is a +// poster-signed authorization the facilitator executes; the annotation channel +// can never carry escrow endpoint or credential config (that comes ONLY from +// controller env, see newBountyEscrowGateway). + +import ( + "context" + "encoding/json" + "fmt" + "log" + "math/big" + "sort" + "strconv" + "strings" + "time" + + "github.com/ObolNetwork/obol-stack/internal/bounty" + "github.com/ObolNetwork/obol-stack/internal/monetizeapi" + "github.com/ObolNetwork/obol-stack/internal/x402" + "github.com/ObolNetwork/obol-stack/internal/x402/escrow" + "github.com/ethereum/go-ethereum/common" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +const ( + bountyEvalCommitPrefix = "obol.org/eval-commit-" + bountyEvalRevealPrefix = "obol.org/eval-reveal-" + + evalPhaseCommitted = "Committed" + evalPhaseRevealed = "Revealed" + evalPhaseBadReveal = "BadReveal" + evalPhaseNonReveal = "NonReveal" + + // evalPassThreshold: median revealed score (0-100, ERC-8004 + // validationResponse semantics) at or above this verifies the submission. + evalPassThreshold = 50 + + // evalOutlierBand: a revealed score further than this from the median is + // marked WithinBand=false (the divergence penalty reputation keys on). + evalOutlierBand = 20 + + // defaultRevealWindow guards against a task package with a missing or + // unparseable ladder.revealWindow. + defaultRevealWindow = 10 * time.Minute +) + +// Voucher ferry: annotations carrying a JSON-encoded escrow.Permit2Voucher, +// signed by the poster's agent and attached to the matching ReserveRequest. +const ( + bountyRewardVoucherAnnotation = "obol.org/reward-voucher" + bountyBondVoucherAnnotation = "obol.org/bond-voucher" + bountyEvalVoucherAnnotation = "obol.org/eval-voucher" + bountyEvalVoucherR1Annotation = "obol.org/eval-voucher-r1" + + // escrowStateAwaitingVoucher is the facilitator's "request verified, + // waiting for the signed Permit2 voucher" reservation state. Reserves in + // this state re-run on later reconciles (idempotent at the facilitator) + // until the voucher annotation ferries in. + escrowStateAwaitingVoucher = "AwaitingVoucher" + + // escrowStateUnfunded parks an escalation whose voucher never arrived + // before the escalation window closed — the round-0 median stands. + escrowStateUnfunded = "Unfunded" +) + +// bountyEvalReveal is the eval-reveal annotation payload. ValidationTx is the +// optional ERC-8004 validationResponse transaction the evaluator submitted +// with their OWN wallet — recorded as provenance, never required. +type bountyEvalReveal struct { + Score int64 `json:"score"` + Salt string `json:"salt"` + ValidationTx string `json:"validationTx,omitempty"` +} + +// evalMarketActive reports whether quorum verification applies: skipped mode +// and poster-manual acceptance both leave the poster as the judge. +func evalMarketActive(sb *monetizeapi.ServiceBounty) bool { + return sb.Spec.Eval.Mode != monetizeapi.EvalModeDangerouslySkipped && + sb.Spec.Acceptance.Method != "poster-manual" +} + +// evalRoundIO bundles one commit-reveal round: the annotation prefixes it +// reads and the status fields it mutates. Round 0 points at the top-level +// status fields; round 1 points into status.escalation. Same engine, same +// semantics (address-bound commits, K-gated reveal window, non-reveal = +// worst-case outlier). +type evalRoundIO struct { + commitPrefix string + revealPrefix string + // seats maps canonical (EIP-55) address → seat kind. With restrict=true + // only seated addresses are admitted (panel mode); otherwise the door is + // open (round-0 fallback when the enrolled pool is too small). + seats map[string]string + restrict bool + // k counting commitments close the commit window and open the reveal + // window. + k int64 + window time.Duration + evaluations *[]monetizeapi.ServiceBountyEvaluation + deadline **metav1.Time +} + +// runEvalRound drives one commit-reveal round over the annotation channel. +// It reports whether the round settled (every commitment graded, or the +// reveal window closed) and a positive requeue duration when the reveal +// window was just opened. +func runEvalRound(annotations map[string]string, round evalRoundIO, now time.Time) (settled bool, requeue time.Duration) { + // 1. Promote commitments (first write wins per address — a commitment is + // binding; later annotation edits must not rewrite history). + for key, value := range annotations { + addr, ok := strings.CutPrefix(key, round.commitPrefix) + if !ok || !common.IsHexAddress(addr) { + continue + } + canonical := common.HexToAddress(addr).Hex() + seat := "" + if round.restrict { + s, selected := round.seats[canonical] + if !selected { + continue // not on the panel — the open door is closed + } + seat = s + } + if findEvaluation(*round.evaluations, canonical) != nil { + continue + } + *round.evaluations = append(*round.evaluations, monetizeapi.ServiceBountyEvaluation{ + Address: canonical, + CommitHash: strings.TrimSpace(value), + Phase: evalPhaseCommitted, + Seat: seat, + }) + } + evaluations := *round.evaluations + sort.Slice(evaluations, func(i, j int) bool { + return evaluations[i].Address < evaluations[j].Address + }) + + // 2. The commit window closes (and the reveal window opens) only when K + // COUNTING commitments are in (shadows never gate the window). No reveal + // is graded before that instant. + if *round.deadline == nil { + counting := int64(0) + for _, evaluation := range evaluations { + if evaluation.Seat != monetizeapi.PanelSeatShadow { + counting++ + } + } + if counting < round.k { + return false, 0 + } + deadline := metav1.NewTime(now.Add(round.window)) + *round.deadline = &deadline + requeue = time.Until(deadline.Time) + time.Second + } + + // 3. Grade reveals against the address-bound commitment. + for key, value := range annotations { + addr, ok := strings.CutPrefix(key, round.revealPrefix) + if !ok || !common.IsHexAddress(addr) { + continue + } + evaluation := findEvaluation(evaluations, common.HexToAddress(addr).Hex()) + if evaluation == nil || evaluation.Phase != evalPhaseCommitted { + continue + } + var reveal bountyEvalReveal + if err := json.Unmarshal([]byte(value), &reveal); err != nil { + evaluation.Phase = evalPhaseBadReveal + continue + } + if monetizeapi.EvalCommitHash(reveal.Score, reveal.Salt, evaluation.Address) != evaluation.CommitHash { + evaluation.Phase = evalPhaseBadReveal + continue + } + revealedAt := metav1.NewTime(now) + evaluation.Phase = evalPhaseRevealed + evaluation.Score = reveal.Score + evaluation.RevealedAt = &revealedAt + evaluation.ValidationTxHash = strings.TrimSpace(reveal.ValidationTx) + } + + // 4. Past the reveal window, missing reveals become worst-case outliers. + deadlinePassed := now.After((*round.deadline).Time) + if deadlinePassed { + for i := range evaluations { + if evaluations[i].Phase == evalPhaseCommitted { + evaluations[i].Phase = evalPhaseNonReveal + } + } + } + + // 5. The round settles when every commitment is graded (all revealed + // early) or the reveal window has closed. + settled = deadlinePassed + if !settled { + settled = true + for _, evaluation := range evaluations { + if evaluation.Phase == evalPhaseCommitted { + settled = false + break + } + } + } + return settled, requeue +} + +// reconcileEvalMarket promotes commit/reveal annotations into status and, once +// the quorum settles (running at most one escalation round first), writes the +// Verified condition with reason EvaluatorQuorum. Returns a positive duration +// when the bounty should be requeued (reveal-window or escalation-window +// expiry). +func (c *Controller) reconcileEvalMarket(ctx context.Context, sb *monetizeapi.ServiceBounty, annotations map[string]string, status *monetizeapi.ServiceBountyStatus, now time.Time) time.Duration { + // 0. Panel selection (once) + eval-budget reservation. The budget is the + // SEPARATE OBOL leg: k × perEvaluator, poster-funded, paid to evaluators + // win-or-lose. + c.ensurePanel(ctx, sb, status) + c.reserveEvalBudget(ctx, sb, annotations, status) + + // Seat lookup is by CANONICAL (EIP-55) address — enrollments may carry any + // case, annotations another; HexToAddress.Hex() is the one true form. + panelSeats := make(map[string]string, len(status.EvaluatorPanel)) + for _, seat := range status.EvaluatorPanel { + panelSeats[common.HexToAddress(seat.Address).Hex()] = seat.Seat + } + + k := evalQuorumK(sb) + settled, requeue := runEvalRound(annotations, evalRoundIO{ + commitPrefix: bountyEvalCommitPrefix, + revealPrefix: bountyEvalRevealPrefix, + seats: panelSeats, + restrict: len(panelSeats) > 0, + k: k, + window: revealWindow(sb), + evaluations: &status.Evaluations, + deadline: &status.RevealDeadline, + }, now) + if !settled { + return requeue + } + + // Median over COUNTING reveals only — shadows are graded against it but + // never move it (the free reputation on-ramp can't sway verdicts). + scores := countingScores(status.Evaluations) + if len(scores) == 0 { + setPurchaseCondition(&status.Conditions, "Verified", "False", "EvaluatorQuorum", + "No valid reveals — submission unverifiable; poster may override or the deadline refunds") + return requeue + } + + median := medianInt64(scores) + markOutlierBands(status.Evaluations, median) + + // Escalation trigger — checked after every counting reveal is graded and + // BEFORE the EvaluatorQuorum verdict is spoken. Single-round latch: + // status.escalation, once set, is never re-opened; a spoken + // EvaluatorQuorum verdict latches the thin-pool fallthrough so a pool + // that grows later can never re-open a settled bounty. + quorumAlreadySpoke := conditionReason(status.Conditions, "Verified") == "EvaluatorQuorum" + if sb.Spec.Eval.Mode == monetizeapi.EvalModeRequired && status.Escalation == nil && !quorumAlreadySpoke { + if reason := escalationTrigger(status.Evaluations, k, median, escalationEpsilon(sb)); reason != "" { + if opened, retry := c.openEscalation(ctx, sb, annotations, status, reason, now); !opened && retry { + // Transient selection failure — verdict not spoken. The + // deadline requeue may be 0 here (reveal deadline already + // passed), so schedule the retry explicitly or a deadline-less + // bounty would wait for an external event. + return maxDuration(requeue, seedRetryDelay) + } + } + } + + finalMedian := median + finalReveals := len(scores) + escalated := false + if esc := status.Escalation; esc != nil { + done, escRequeue := c.runEscalation(ctx, sb, annotations, status, now) + if !done { + return maxDuration(requeue, escRequeue) + } + if r1Scores := countingScores(esc.Evaluations); len(r1Scores) > 0 { + // The round-1 median over the 2k+1 panel is FINAL. + finalMedian = medianInt64(r1Scores) + finalReveals = len(r1Scores) + markOutlierBands(esc.Evaluations, finalMedian) + escalated = true + } else if len(esc.Evaluations) > 0 { + markOutlierBands(esc.Evaluations, median) + setPurchaseCondition(&status.Conditions, "Escalated", "True", "EscalationNoReveals", + "Round-1 panel produced no valid reveals — the round-0 median stands") + } + } + + escalationNote := "" + if escalated { + escalationNote = fmt.Sprintf(" — escalated (%s); round-1 median is final", status.Escalation.Reason) + } + status.WeightedScore = finalMedian + if finalMedian >= evalPassThreshold { + setPurchaseCondition(&status.Conditions, "Verified", "True", "EvaluatorQuorum", + fmt.Sprintf("Median score %d/100 from %d reveal(s) meets the %d threshold%s", finalMedian, finalReveals, evalPassThreshold, escalationNote)) + if len(status.Claims) > 0 && status.Claims[0].Phase == bountyPhaseSubmitted { + status.Claims[0].Phase = bountyPhaseVerified + } + } else { + setPurchaseCondition(&status.Conditions, "Verified", "False", "EvaluatorQuorum", + fmt.Sprintf("Median score %d/100 from %d reveal(s) is below the %d threshold%s", finalMedian, finalReveals, evalPassThreshold, escalationNote)) + if len(status.Claims) > 0 && status.Claims[0].Phase == bountyPhaseSubmitted { + status.Claims[0].Phase = bountyPhaseRejected + } + } + + // 6. Settlement side-effects, once per bounty: pay the evaluators + // (win-or-lose — they did the work), ground reveals against the chain, + // and record the cross-bounty ladder. Grounding runs BEFORE ladder + // bookkeeping (recordLadder reads Grounded) and never changes the verdict. + c.settleEvalBudget(ctx, sb, status) + c.settleEscalationBudget(ctx, sb, status) + if !status.LadderRecorded { + c.groundEvaluations(ctx, sb, status, status.Evaluations) + if status.Escalation != nil { + c.groundEvaluations(ctx, sb, status, status.Escalation.Evaluations) + } + err := c.recordLadder(ctx, sb, status) + if err == nil && status.Escalation != nil && len(status.Escalation.Evaluations) > 0 { + // Ladder bookkeeping covers round-1 participants too, graded + // against the round-1 median (already banded above). + roundOne := *status + roundOne.Evaluations = status.Escalation.Evaluations + err = c.recordLadder(ctx, sb, &roundOne) + } + if err != nil { + log.Printf("serviceoffer-controller: record evaluator ladder for %s/%s: %v", sb.Namespace, sb.Name, err) + } else { + status.LadderRecorded = true + } + } + return requeue +} + +// reserveEvalBudget holds the poster-funded OBOL eval budget (k × perEvaluator, +// minus the newcomer discount when a probation seat is sitting) at the escrow +// gateway under -eval. Errors are non-fatal: evaluation proceeds and the +// reserve retries on the next reconcile. An AwaitingVoucher hold re-reserves +// each reconcile (idempotent) until the obol.org/eval-voucher annotation +// ferries the poster's Permit2 voucher in. +func (c *Controller) reserveEvalBudget(ctx context.Context, sb *monetizeapi.ServiceBounty, annotations map[string]string, status *monetizeapi.ServiceBountyStatus) { + if sb.Spec.Eval.Payment.PerEvaluator == "" { + return + } + if status.EvalBudgetState != "" && status.EvalBudgetState != escrowStateAwaitingVoucher { + return + } + total := evalBudgetTotal(sb, status) + if total == "" { + return + } + receipt, err := c.escrowGateway().Reserve(ctx, escrow.ReserveRequest{ + ID: string(sb.UID) + "-eval", + Network: sb.Spec.Reward.Network, + PayTo: sb.Spec.Reward.PayTo, // poster refund address + Asset: sb.Spec.Eval.Payment.Asset, + Amount: total, + Scheme: sb.Spec.Reward.Escrow.Scheme, + Voucher: voucherFromAnnotations(annotations, bountyEvalVoucherAnnotation), + }) + if err != nil { + log.Printf("serviceoffer-controller: reserve eval budget for %s/%s: %v", sb.Namespace, sb.Name, err) + return + } + status.EvalBudgetState = receipt.State + ferryEscrowSpender(status, receipt) +} + +// evalSeatAmounts resolves the per-evaluator eval price into the full and +// probation-half per-seat amount strings used for CaptureBatch recipients. +// When the asset resolves in the token registry the amounts are ATOMIC token +// units — escrow.BuildTransferDetails matches capture recipients against the +// poster's Permit2 voucher seats with exact integer comparison, and the CLI +// (cmd/obol bountyEvalFundRecipients) signs perAtomic / floor(perAtomic/2). +// An unresolvable asset falls back to human-unit strings: the dev ledger +// gateway treats amounts as opaque bookkeeping, and a real facilitator could +// never have verified a voucher for a token the CLI cannot resolve either. +func evalSeatAmounts(sb *monetizeapi.ServiceBounty) (full, half string, ok bool) { + per := strings.TrimSpace(sb.Spec.Eval.Payment.PerEvaluator) + perFloat, err := strconv.ParseFloat(per, 64) + if err != nil || perFloat <= 0 { + return "", "", false + } + full = strconv.FormatFloat(perFloat, 'f', 2, 64) + half = strconv.FormatFloat(perFloat/2, 'f', 2, 64) + entry, found := x402.ResolveToken(sb.Spec.Eval.Payment.Asset, sb.Spec.Reward.Network) + if !found { + return full, half, true + } + atomicStr, err := escrow.HumanToAtomic(per, entry.Decimals) + if err != nil { + return full, half, true + } + perAtomic, parsed := new(big.Int).SetString(atomicStr, 10) + if !parsed { + return full, half, true + } + return perAtomic.String(), new(big.Int).Div(perAtomic, big.NewInt(2)).String(), true +} + +// settleEvalBudget batch-settles the held eval budget to every counting +// evaluator with a valid reveal (probation seats at half price — the discount +// already went to the poster at reserve time). Shadows evaluate free; non/bad +// reveals earn nothing (the monetary edge of the non-reveal penalty). +func (c *Controller) settleEvalBudget(ctx context.Context, sb *monetizeapi.ServiceBounty, status *monetizeapi.ServiceBountyStatus) { + if status.EvalBudgetState != escrow.StateReserved { + return + } + fullAmount, halfAmount, ok := evalSeatAmounts(sb) + if !ok { + return + } + + var recipients []escrow.BatchRecipient + paid := make(map[string]bool) + k := evalQuorumK(sb) + for i := range status.Evaluations { + evaluation := &status.Evaluations[i] + if evaluation.Phase != evalPhaseRevealed || evaluation.Seat == monetizeapi.PanelSeatShadow { + continue + } + if int64(len(recipients)) >= k { + break // open-door can over-subscribe; the budget pays k seats + } + amount := fullAmount + if evaluation.Seat == monetizeapi.PanelSeatProbation { + amount = halfAmount + } + recipients = append(recipients, escrow.BatchRecipient{ + Address: evaluation.Address, + Amount: amount, + }) + paid[evaluation.Address] = true + } + if len(recipients) == 0 { + return // nothing to pay; refund path voids the budget + } + + var receipt escrow.Receipt + var err error + if batch, ok := c.escrowGateway().(escrow.BatchGateway); ok { + receipt, err = batch.CaptureBatch(ctx, string(sb.UID)+"-eval", recipients) + } else { + receipt, err = c.escrowGateway().Capture(ctx, string(sb.UID)+"-eval") + } + if err != nil { + log.Printf("serviceoffer-controller: settle eval budget for %s/%s: %v", sb.Namespace, sb.Name, err) + return + } + status.EvalBudgetState = receipt.State + status.EvalPayoutTxHash = receipt.TxHash + ferryEscrowSpender(status, receipt) + for i := range status.Evaluations { + if paid[status.Evaluations[i].Address] { + status.Evaluations[i].Paid = true + } + } +} + +// evalBudgetTotal computes k × perEvaluator with the probation seat at half +// price (the newcomer discount is passed to the poster). +func evalBudgetTotal(sb *monetizeapi.ServiceBounty, status *monetizeapi.ServiceBountyStatus) string { + per, err := strconv.ParseFloat(strings.TrimSpace(sb.Spec.Eval.Payment.PerEvaluator), 64) + if err != nil || per <= 0 { + return "" + } + total := float64(evalQuorumK(sb)) * per + for _, seat := range status.EvaluatorPanel { + if seat.Seat == monetizeapi.PanelSeatProbation { + total -= per / 2 + break + } + } + return strconv.FormatFloat(total, 'f', 2, 64) +} + +// evalQuorumK is spec.eval.k floored at 1 (the median of one is that one). +func evalQuorumK(sb *monetizeapi.ServiceBounty) int64 { + k := sb.Spec.Eval.K + if k < 1 { + k = 1 + } + return k +} + +// countingScores collects the revealed scores of counting (non-shadow) seats. +func countingScores(evaluations []monetizeapi.ServiceBountyEvaluation) []int64 { + var scores []int64 + for _, evaluation := range evaluations { + if evaluation.Phase == evalPhaseRevealed && evaluation.Seat != monetizeapi.PanelSeatShadow { + scores = append(scores, evaluation.Score) + } + } + return scores +} + +// markOutlierBands grades every evaluation's divergence from the median: +// revealed scores within evalOutlierBand are in band; non/bad reveals are +// worst-case outliers by definition. +func markOutlierBands(evaluations []monetizeapi.ServiceBountyEvaluation, median int64) { + for i := range evaluations { + evaluation := &evaluations[i] + switch evaluation.Phase { + case evalPhaseRevealed: + diff := evaluation.Score - median + if diff < 0 { + diff = -diff + } + evaluation.WithinBand = diff <= evalOutlierBand + default: + evaluation.WithinBand = false + } + } +} + +func findEvaluation(evaluations []monetizeapi.ServiceBountyEvaluation, address string) *monetizeapi.ServiceBountyEvaluation { + for i := range evaluations { + if evaluations[i].Address == address { + return &evaluations[i] + } + } + return nil +} + +// revealWindow resolves the task package's ladder.revealWindow. +func revealWindow(sb *monetizeapi.ServiceBounty) time.Duration { + t, err := bounty.Resolve(sb.Spec.Task.TypeRef) + if err != nil { + return defaultRevealWindow + } + window, err := time.ParseDuration(t.Eval.Ladder.RevealWindow) + if err != nil || window <= 0 { + return defaultRevealWindow + } + return window +} + +// medianInt64 returns the median (lower-middle average for even counts) — +// robust to one outlier, which is what makes a newcomer seat verdict-safe. +func medianInt64(values []int64) int64 { + sorted := make([]int64, len(values)) + copy(sorted, values) + sort.Slice(sorted, func(i, j int) bool { return sorted[i] < sorted[j] }) + mid := len(sorted) / 2 + if len(sorted)%2 == 1 { + return sorted[mid] + } + return (sorted[mid-1] + sorted[mid]) / 2 +} + +// ── voucher ferry helpers ─────────────────────────────────────────────────── + +// voucherFromAnnotations decodes the JSON Permit2 voucher ferried on the given +// annotation. A voucher carries ONLY poster-signed transfer fields +// (escrow.Permit2Voucher); escrow endpoint/credential configuration comes from +// controller env alone (newBountyEscrowGateway) and can never ride in here. +// Malformed payloads are treated as absent — the facilitator keeps the hold in +// AwaitingVoucher until a valid voucher arrives. +func voucherFromAnnotations(annotations map[string]string, key string) *escrow.Permit2Voucher { + raw := strings.TrimSpace(annotations[key]) + if raw == "" { + return nil + } + var voucher escrow.Permit2Voucher + if err := json.Unmarshal([]byte(raw), &voucher); err != nil { + log.Printf("serviceoffer-controller: invalid %s annotation (ignored): %v", key, err) + return nil + } + return &voucher +} + +// ferryEscrowSpender records the FIRST non-empty facilitator spender address +// seen on any escrow receipt into status.escrowSpender, so poster-side signers +// know which executor to bind their Permit2 vouchers to. +func ferryEscrowSpender(status *monetizeapi.ServiceBountyStatus, receipt escrow.Receipt) { + if status.EscrowSpender == "" && receipt.Spender != "" { + status.EscrowSpender = receipt.Spender + } +} + +// isEscrowVoucherRefusal classifies a facilitator capture refusal caused by a +// missing/expired voucher (HTTPGateway surfaces the response body inside the +// error text). Such refusals park as a condition + requeue — a poster-side +// signing gap must never fail the reconcile loop. Only the facilitator's 409 +// awaiting-voucher refusal parks: a 400 seat-mismatch (recipients not in the +// stored voucher) must surface as a capture failure, not loop as "awaiting". +func isEscrowVoucherRefusal(err error) bool { + if err == nil { + return false + } + msg := strings.ToLower(err.Error()) + return strings.Contains(msg, "facilitator returned 409") && strings.Contains(msg, "voucher") +} + +func maxDuration(a, b time.Duration) time.Duration { + if a > b { + return a + } + return b +} diff --git a/internal/serviceoffercontroller/bounty_eval_test.go b/internal/serviceoffercontroller/bounty_eval_test.go new file mode 100644 index 00000000..485c4c21 --- /dev/null +++ b/internal/serviceoffercontroller/bounty_eval_test.go @@ -0,0 +1,342 @@ +package serviceoffercontroller + +import ( + "fmt" + "strings" + "testing" + + "github.com/ObolNetwork/obol-stack/internal/monetizeapi" + "github.com/ObolNetwork/obol-stack/internal/x402/escrow" +) + +const ( + evalA = "0xaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" + evalB = "0xbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb" + evalC = "0xcccccccccccccccccccccccccccccccccccccccc" +) + +// testEvalBounty is a quorum-verified (mode=required, rerun-tolerance) bounty +// with k=3, claimed and submitted. +func testEvalBounty(name string) *monetizeapi.ServiceBounty { + sb := testBounty(name) + sb.Spec.Acceptance.Method = "rerun-tolerance" + sb.Spec.Eval = monetizeapi.ServiceBountyEval{ + K: 3, + Mode: monetizeapi.EvalModeRequired, + Payment: monetizeapi.ServiceBountyEvalPayment{ + Asset: "OBOL", PerEvaluator: "2.00", FundedBy: "poster", Settle: "batch-settlement", + }, + } + return sb +} + +func claimAndSubmit(t *testing.T, c *Controller, ns, name string) { + t.Helper() + key := ns + "/" + name + reconcileBountyUntilSettled(t, c, key) + annotateBounty(t, c, ns, name, map[string]string{ + "obol.org/claim": "0x2222222222222222222222222222222222222222", + }) + reconcileBountyUntilSettled(t, c, key) + annotateBounty(t, c, ns, name, map[string]string{ + "obol.org/submit": `{"resultHash":"0xbeef","reportURI":"file:///r.json"}`, + }) + reconcileBountyUntilSettled(t, c, key) +} + +func commitAndReveal(t *testing.T, c *Controller, ns, name string, scores map[string]int64) { + t.Helper() + key := ns + "/" + name + // Commit phase: all evaluators commit before anyone reveals. + for addr, score := range scores { + annotateBounty(t, c, ns, name, map[string]string{ + "obol.org/eval-commit-" + addr: monetizeapi.EvalCommitHash(score, "salt-"+addr, addr), + }) + } + reconcileBountyUntilSettled(t, c, key) + // Reveal phase. + for addr, score := range scores { + annotateBounty(t, c, ns, name, map[string]string{ + "obol.org/eval-reveal-" + addr: fmt.Sprintf(`{"score":%d,"salt":"salt-%s"}`, score, addr), + }) + } + reconcileBountyUntilSettled(t, c, key) +} + +func TestEvalMarket_QuorumPassToPaid(t *testing.T) { + sb := testEvalBounty("quorum-pass") + c := newBountyTestController(t, sb) + ns := "hermes-obol-agent" + + claimAndSubmit(t, c, ns, "quorum-pass") + commitAndReveal(t, c, ns, "quorum-pass", map[string]int64{evalA: 90, evalB: 85, evalC: 40}) + + got := getBounty(t, c, ns, "quorum-pass") + if reason := conditionReason(got.Status.Conditions, "Verified"); reason != "EvaluatorQuorum" { + t.Fatalf("Verified reason = %q, want EvaluatorQuorum", reason) + } + if !bountyConditionIsTrue(got.Status.Conditions, "Verified") { + t.Fatal("median 85 >= 50 must verify") + } + if got.Status.WeightedScore != 85 { + t.Errorf("WeightedScore = %d, want median 85", got.Status.WeightedScore) + } + if got.Status.Phase != bountyPhasePaid { + t.Errorf("phase = %q, want Paid (quorum verdict releases the escrow)", got.Status.Phase) + } + // The 40 is >20 from the median 85 → out of band; the others in band. + for _, ev := range got.Status.Evaluations { + wantBand := ev.Score >= 65 + if ev.WithinBand != wantBand { + t.Errorf("evaluator %s score %d withinBand = %v, want %v", ev.Address, ev.Score, ev.WithinBand, wantBand) + } + } +} + +func TestEvalMarket_QuorumRejects(t *testing.T) { + sb := testEvalBounty("quorum-reject") + c := newBountyTestController(t, sb) + ns := "hermes-obol-agent" + + claimAndSubmit(t, c, ns, "quorum-reject") + commitAndReveal(t, c, ns, "quorum-reject", map[string]int64{evalA: 10, evalB: 20, evalC: 90}) + + got := getBounty(t, c, ns, "quorum-reject") + if bountyConditionIsTrue(got.Status.Conditions, "Verified") { + t.Fatal("median 20 < 50 must not verify") + } + if reason := conditionReason(got.Status.Conditions, "Verified"); reason != "EvaluatorQuorum" { + t.Fatalf("Verified reason = %q, want EvaluatorQuorum", reason) + } + if got.Status.Phase != bountyPhaseRejected { + t.Errorf("phase = %q, want Rejected", got.Status.Phase) + } + if bountyConditionIsTrue(got.Status.Conditions, "Paid") { + t.Fatal("rejected bounty must not pay") + } +} + +// The Kleros address-binding steal: evaluator C copies B's commitment hash, +// then replays B's revealed {score, salt}. The hash binds B's address, so C's +// reveal cannot verify — C grades BadReveal and is excluded from the median. +func TestEvalMarket_CommitBoundToAddress(t *testing.T) { + sb := testEvalBounty("copycat") + c := newBountyTestController(t, sb) + ns := "hermes-obol-agent" + + claimAndSubmit(t, c, ns, "copycat") + + honest := map[string]int64{evalA: 80, evalB: 75} + for addr, score := range honest { + annotateBounty(t, c, ns, "copycat", map[string]string{ + "obol.org/eval-commit-" + addr: monetizeapi.EvalCommitHash(score, "salt-"+addr, addr), + }) + } + // C copies B's commitment verbatim. + annotateBounty(t, c, ns, "copycat", map[string]string{ + "obol.org/eval-commit-" + evalC: monetizeapi.EvalCommitHash(75, "salt-"+evalB, evalB), + }) + reconcileBountyUntilSettled(t, c, "hermes-obol-agent/copycat") + + for addr, score := range honest { + annotateBounty(t, c, ns, "copycat", map[string]string{ + "obol.org/eval-reveal-" + addr: fmt.Sprintf(`{"score":%d,"salt":"salt-%s"}`, score, addr), + }) + } + // C replays B's reveal. + annotateBounty(t, c, ns, "copycat", map[string]string{ + "obol.org/eval-reveal-" + evalC: fmt.Sprintf(`{"score":75,"salt":"salt-%s"}`, evalB), + }) + reconcileBountyUntilSettled(t, c, "hermes-obol-agent/copycat") + + got := getBounty(t, c, ns, "copycat") + var copycat *monetizeapi.ServiceBountyEvaluation + for i := range got.Status.Evaluations { + if strings.EqualFold(got.Status.Evaluations[i].Address, evalC) { + copycat = &got.Status.Evaluations[i] + } + } + if copycat == nil { + t.Fatal("copycat evaluation not found") + } + if copycat.Phase != evalPhaseBadReveal { + t.Fatalf("copycat phase = %q, want BadReveal (commitment is address-bound)", copycat.Phase) + } + if !bountyConditionIsTrue(got.Status.Conditions, "Verified") { + t.Error("honest median (80,75 → 77) must still verify") + } +} + +// Reveals posted before K commitments are in must be ignored: every commit +// closes before any reveal opens. +func TestEvalMarket_RevealBeforeWindowIgnored(t *testing.T) { + sb := testEvalBounty("early-reveal") + c := newBountyTestController(t, sb) + ns := "hermes-obol-agent" + + claimAndSubmit(t, c, ns, "early-reveal") + annotateBounty(t, c, ns, "early-reveal", map[string]string{ + "obol.org/eval-commit-" + evalA: monetizeapi.EvalCommitHash(90, "salt-"+evalA, evalA), + "obol.org/eval-reveal-" + evalA: fmt.Sprintf(`{"score":90,"salt":"salt-%s"}`, evalA), + }) + reconcileBountyUntilSettled(t, c, "hermes-obol-agent/early-reveal") + + got := getBounty(t, c, ns, "early-reveal") + if got.Status.RevealDeadline != nil { + t.Fatal("reveal window must not open before k=3 commitments") + } + for _, ev := range got.Status.Evaluations { + if ev.Phase != evalPhaseCommitted { + t.Errorf("evaluation %s phase = %q, want Committed (reveal ignored before the window opens)", ev.Address, ev.Phase) + } + } + if bountyConditionIsTrue(got.Status.Conditions, "Verified") { + t.Fatal("no quorum yet") + } +} + +func TestEvalMarket_SelfBondReturnedOnPass(t *testing.T) { + sb := testEvalBounty("bonded-pass") + sb.Spec.Trust.SelfBond = monetizeapi.ServiceBountySelfBond{Required: true, Amount: "10.00", Token: "OBOL"} + c := newBountyTestController(t, sb) + ns := "hermes-obol-agent" + + claimAndSubmit(t, c, ns, "bonded-pass") + if got := getBounty(t, c, ns, "bonded-pass"); got.Status.BondState != escrow.StateReserved { + t.Fatalf("bond state after claim = %q, want Reserved", got.Status.BondState) + } + + commitAndReveal(t, c, ns, "bonded-pass", map[string]int64{evalA: 90, evalB: 85, evalC: 80}) + got := getBounty(t, c, ns, "bonded-pass") + if got.Status.BondState != "Returned" { + t.Errorf("bond state = %q, want Returned (accepted work returns the bond)", got.Status.BondState) + } + if got.Status.Phase != bountyPhasePaid { + t.Errorf("phase = %q, want Paid", got.Status.Phase) + } +} + +func TestEvalMarket_SelfBondForfeitedOnReject(t *testing.T) { + sb := testEvalBounty("bonded-reject") + sb.Spec.Trust.SelfBond = monetizeapi.ServiceBountySelfBond{Required: true, Amount: "10.00", Token: "OBOL"} + c := newBountyTestController(t, sb) + ns := "hermes-obol-agent" + + claimAndSubmit(t, c, ns, "bonded-reject") + commitAndReveal(t, c, ns, "bonded-reject", map[string]int64{evalA: 10, evalB: 15, evalC: 20}) + + got := getBounty(t, c, ns, "bonded-reject") + if got.Status.BondState != "Forfeited" { + t.Errorf("bond state = %q, want Forfeited (rejected work forfeits the bond)", got.Status.BondState) + } + if got.Status.Phase != bountyPhaseRejected { + t.Errorf("phase = %q, want Rejected", got.Status.Phase) + } +} + +// Poster override on top of an active eval market: an explicit accept verdict +// wins even before the quorum settles. +func TestEvalMarket_PosterOverrideStillWins(t *testing.T) { + sb := testEvalBounty("override") + c := newBountyTestController(t, sb) + ns := "hermes-obol-agent" + + claimAndSubmit(t, c, ns, "override") + annotateBounty(t, c, ns, "override", map[string]string{"obol.org/verdict": "accept"}) + reconcileBountyUntilSettled(t, c, "hermes-obol-agent/override") + + got := getBounty(t, c, ns, "override") + if reason := conditionReason(got.Status.Conditions, "Verified"); reason != "PosterOverride" { + t.Fatalf("Verified reason = %q, want PosterOverride", reason) + } + if got.Status.Phase != bountyPhasePaid { + t.Errorf("phase = %q, want Paid", got.Status.Phase) + } +} + +func TestMedianInt64(t *testing.T) { + cases := []struct { + in []int64 + want int64 + }{ + {[]int64{90}, 90}, + {[]int64{90, 40}, 65}, + {[]int64{90, 85, 40}, 85}, + {[]int64{1, 2, 3, 100}, 2}, + } + for _, tc := range cases { + if got := medianInt64(tc.in); got != tc.want { + t.Errorf("median(%v) = %d, want %d", tc.in, got, tc.want) + } + } +} + +// ── eval payment units: capture recipients must match the voucher seats ──── + +// The poster's Permit2 voucher seats are signed in ATOMIC token units +// (cmd/obol bountyEvalFundRecipients: perAtomic, probation floor(perAtomic/2)); +// escrow.BuildTransferDetails matches CaptureBatch recipients against those +// seats with exact integer comparison. The controller's settle paths must +// therefore speak atomic units whenever the asset resolves in the token +// registry — a human-unit "2.00" recipient would 4xx every real capture. +func TestEvalSeatAmounts_AtomicMatchesVoucherSeatMath(t *testing.T) { + sb := testEvalBounty("atomic-units") // Asset OBOL, PerEvaluator 2.00 + sb.Spec.Reward.Network = "base-sepolia" + + full, half, ok := evalSeatAmounts(sb) + if !ok { + t.Fatal("evalSeatAmounts must resolve a positive perEvaluator price") + } + wantFull, err := escrow.HumanToAtomic("2.00", 18) // OBOL is 18 decimals on base-sepolia + if err != nil { + t.Fatalf("HumanToAtomic: %v", err) + } + if full != wantFull || full != "2000000000000000000" { + t.Fatalf("full seat = %q, want atomic %q", full, wantFull) + } + if half != "1000000000000000000" { + t.Fatalf("probation seat = %q, want floor(perAtomic/2) = 1000000000000000000", half) + } + + // An asset/network pair outside the token registry (OBOL is not + // registered on base mainnet) falls back to human-unit bookkeeping + // strings — the dev ledger gateway treats amounts as opaque, and no + // CLI-signed voucher can exist for an unresolvable token anyway. + sb.Spec.Reward.Network = "base" + full, half, ok = evalSeatAmounts(sb) + if !ok || full != "2.00" || half != "1.00" { + t.Fatalf("unresolvable token fallback = (%q, %q, %v), want (2.00, 1.00, true)", full, half, ok) + } + + sb.Spec.Eval.Payment.PerEvaluator = "not-a-number" + if _, _, ok := evalSeatAmounts(sb); ok { + t.Fatal("a non-numeric perEvaluator price must not settle") + } +} + +func TestEvalSettle_CaptureRecipientsAreAtomic(t *testing.T) { + sb := testEvalBounty("atomic-settle") + sb.Spec.Reward.Network = "base-sepolia" // OBOL resolves → atomic units + c := newBountyTestController(t, sb) + fake := newFakeEscrow() + c.bountyEscrow = fake + ns := "hermes-obol-agent" + + claimAndSubmit(t, c, ns, "atomic-settle") + // All in band (median 85) — no escalation, straight to settle. + commitAndReveal(t, c, ns, "atomic-settle", map[string]int64{evalA: 90, evalB: 85, evalC: 80}) + + got := getBounty(t, c, ns, "atomic-settle") + if got.Status.EvalBudgetState != escrow.StateCaptured { + t.Fatalf("eval budget = %q, want Captured", got.Status.EvalBudgetState) + } + recipients := fake.batches["uid-atomic-settle-eval"] + if len(recipients) != 3 { + t.Fatalf("capture recipients = %d, want 3", len(recipients)) + } + for _, r := range recipients { + if r.Amount != "2000000000000000000" { + t.Fatalf("recipient %s amount = %q, want atomic 2000000000000000000 (matches the CLI voucher seat)", r.Address, r.Amount) + } + } +} diff --git a/internal/serviceoffercontroller/bounty_grounding.go b/internal/serviceoffercontroller/bounty_grounding.go new file mode 100644 index 00000000..9a62f51c --- /dev/null +++ b/internal/serviceoffercontroller/bounty_grounding.go @@ -0,0 +1,122 @@ +package serviceoffercontroller + +// Grounding: an annotation-level reveal that carries a validationTx claims an +// on-chain ERC-8004 validationResponse backs it. The controller READS the +// Validation Registry on the bounty's payment network (per-network client via +// eRPC, ERC8004_RPC_BASE — the registration watcher pattern) and marks the +// evaluation Grounded only when the on-chain responder is the evaluator AND +// the on-chain response equals the revealed score, for the request hash +// erc8004.BountyEvalRequestHash(bountyUID, evaluator). +// +// Grounding is ADVISORY reputation signal: chain unreachable, no entry, or a +// mismatch leaves Grounded=false with a condition explaining why — it never +// blocks, delays, or changes the quorum verdict. The controller still signs +// nothing; the validationResponse tx was submitted by the evaluator's own +// wallet. + +import ( + "context" + "fmt" + "strings" + + "github.com/ObolNetwork/obol-stack/internal/erc8004" + "github.com/ObolNetwork/obol-stack/internal/monetizeapi" + "github.com/ethereum/go-ethereum/common" +) + +// bountyValidationReader is the narrow chain-read seam grounding needs. +type bountyValidationReader interface { + ValidationStatus(ctx context.Context, requestHash common.Hash) (erc8004.ValidationStatus, error) +} + +// bountyValidationReaderFactory dials a read-only ERC-8004 Validation Registry +// reader for the given network. It is a package seam (the grounding twin of +// the bountyEscrow fake) swapped by tests to inject a fake chain; it cannot be +// a Controller field without editing controller.go, which a parallel lane +// owns. The returned func() releases the underlying RPC client. +var bountyValidationReaderFactory = func(ctx context.Context, rpcBase, network string) (bountyValidationReader, func(), error) { + net, err := erc8004.ResolveNetwork(network) + if err != nil { + return nil, nil, err + } + registry, err := erc8004.ValidationRegistryAddress(network) + if err != nil { + return nil, nil, err + } + client, err := erc8004.NewClientForNetwork(ctx, rpcBase, net) + if err != nil { + return nil, nil, err + } + reader, err := erc8004.NewValidationReader(client.ETH(), registry) + if err != nil { + client.Close() + return nil, nil, err + } + return reader, client.Close, nil +} + +// groundEvaluations sets Grounded on every revealed evaluation in the slice +// whose validationTx claim is backed by a matching on-chain validation entry. +// It runs BEFORE ladder bookkeeping (recordLadder reads Grounded) and mutates +// only the Grounded flags + the EvalGrounded condition — never the verdict. +func (c *Controller) groundEvaluations(ctx context.Context, sb *monetizeapi.ServiceBounty, status *monetizeapi.ServiceBountyStatus, evaluations []monetizeapi.ServiceBountyEvaluation) { + var pending []int + for i := range evaluations { + if evaluations[i].Phase == evalPhaseRevealed && + strings.TrimSpace(evaluations[i].ValidationTxHash) != "" && + !evaluations[i].Grounded { + pending = append(pending, i) + } + } + if len(pending) == 0 { + return // nothing claims chain backing — touch no condition, dial nothing + } + + network := sb.Spec.Reward.Network + if _, err := erc8004.ValidationRegistryAddress(network); err != nil { + setPurchaseCondition(&status.Conditions, "EvalGrounded", "False", "RegistryUnavailable", + truncateMessage(fmt.Sprintf("no validation registry for network %q: %v", network, err))) + return + } + + rpcBase := c.registrationRPCBase + if rpcBase == "" { + rpcBase = erc8004.DefaultRPCBase + } + reader, closeReader, err := bountyValidationReaderFactory(ctx, rpcBase, network) + if err != nil { + setPurchaseCondition(&status.Conditions, "EvalGrounded", "False", "ChainUnreachable", + truncateMessage(fmt.Sprintf("validation registry on %s unreachable: %v", network, err))) + return + } + defer closeReader() + + grounded := 0 + var problems []string + for _, i := range pending { + evaluation := &evaluations[i] + requestHash := erc8004.BountyEvalRequestHash(string(sb.UID), evaluation.Address) + onchain, err := reader.ValidationStatus(ctx, requestHash) + switch { + case err != nil: + problems = append(problems, fmt.Sprintf("%s: chain read failed: %v", evaluation.Address, err)) + case onchain.ValidatorAddress == (common.Address{}): + problems = append(problems, fmt.Sprintf("%s: no on-chain validation entry", evaluation.Address)) + case onchain.ValidatorAddress != common.HexToAddress(evaluation.Address): + problems = append(problems, fmt.Sprintf("%s: on-chain responder %s is not the evaluator", evaluation.Address, onchain.ValidatorAddress.Hex())) + case int64(onchain.Response) != evaluation.Score: + problems = append(problems, fmt.Sprintf("%s: on-chain response %d != revealed score %d", evaluation.Address, onchain.Response, evaluation.Score)) + default: + evaluation.Grounded = true + grounded++ + } + } + + if len(problems) == 0 { + setPurchaseCondition(&status.Conditions, "EvalGrounded", "True", "Grounded", + fmt.Sprintf("%d evaluation(s) grounded by on-chain ERC-8004 validation entries", grounded)) + } else { + setPurchaseCondition(&status.Conditions, "EvalGrounded", "False", "NotGrounded", + truncateMessage(strings.Join(problems, "; "))) + } +} diff --git a/internal/serviceoffercontroller/bounty_lifecycle_test.go b/internal/serviceoffercontroller/bounty_lifecycle_test.go new file mode 100644 index 00000000..c6bda966 --- /dev/null +++ b/internal/serviceoffercontroller/bounty_lifecycle_test.go @@ -0,0 +1,540 @@ +package serviceoffercontroller + +import ( + "context" + "fmt" + "strings" + "testing" + "time" + + "github.com/ObolNetwork/obol-stack/internal/monetizeapi" + "github.com/ObolNetwork/obol-stack/internal/x402/escrow" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/dynamic/fake" + "k8s.io/client-go/util/workqueue" +) + +func newBountyTestController(t *testing.T, bounties ...*monetizeapi.ServiceBounty) *Controller { + t.Helper() + + objects := make([]runtime.Object, 0, len(bounties)) + for _, sb := range bounties { + objects = append(objects, mustBountyObject(t, sb)) + } + + dynClient := fake.NewSimpleDynamicClientWithCustomListKinds( + runtime.NewScheme(), + map[schema.GroupVersionResource]string{ + monetizeapi.ServiceBountyGVR: "ServiceBountyList", + monetizeapi.EvaluatorEnrollmentGVR: "EvaluatorEnrollmentList", + }, + objects..., + ) + + return &Controller{ + dynClient: dynClient, + bountyQueue: workqueue.NewTypedRateLimitingQueue(workqueue.DefaultTypedControllerRateLimiter[string]()), + bountyEscrow: escrow.NewLedgerGateway(), + } +} + +func mustBountyObject(t *testing.T, sb *monetizeapi.ServiceBounty) *unstructured.Unstructured { + t.Helper() + + sb.TypeMeta = metav1.TypeMeta{ + APIVersion: monetizeapi.Group + "/" + monetizeapi.Version, + Kind: monetizeapi.ServiceBountyKind, + } + obj, err := runtime.DefaultUnstructuredConverter.ToUnstructured(sb) + if err != nil { + t.Fatalf("to unstructured: %v", err) + } + return &unstructured.Unstructured{Object: obj} +} + +func testBounty(name string) *monetizeapi.ServiceBounty { + return &monetizeapi.ServiceBounty{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: "hermes-obol-agent", + UID: types.UID("uid-" + name), + }, + Spec: monetizeapi.ServiceBountySpec{ + Task: monetizeapi.ServiceBountyTask{ + TypeRef: "benchmark@v1", + Params: map[string]string{"dtype": "fp16"}, + }, + Acceptance: monetizeapi.ServiceBountyAcceptance{Method: "poster-manual"}, + Reward: monetizeapi.ServiceBountyReward{ + Network: "base", + PayTo: "0x1111111111111111111111111111111111111111", + Asset: monetizeapi.ServiceOfferAsset{Symbol: "USDC"}, + Amount: "500.00", + Escrow: monetizeapi.ServiceBountyEscrow{Scheme: "upto"}, + }, + MaxFulfillers: 1, + }, + } +} + +// reconcileBountyUntilSettled runs reconcile twice: the first pass may only +// add the finalizer (it returns early, the informer event re-queues in prod). +func reconcileBountyUntilSettled(t *testing.T, c *Controller, key string) { + t.Helper() + for i := 0; i < 2; i++ { + if err := c.reconcileBounty(context.Background(), key); err != nil { + t.Fatalf("reconcile %s (pass %d): %v", key, i, err) + } + } +} + +func getBounty(t *testing.T, c *Controller, namespace, name string) *monetizeapi.ServiceBounty { + t.Helper() + + raw, err := c.dynClient.Resource(monetizeapi.ServiceBountyGVR).Namespace(namespace).Get(context.Background(), name, metav1.GetOptions{}) + if err != nil { + t.Fatalf("get bounty %s/%s: %v", namespace, name, err) + } + var sb monetizeapi.ServiceBounty + if err := runtime.DefaultUnstructuredConverter.FromUnstructured(raw.Object, &sb); err != nil { + t.Fatalf("decode bounty: %v", err) + } + return &sb +} + +func annotateBounty(t *testing.T, c *Controller, namespace, name string, annotations map[string]string) { + t.Helper() + + raw, err := c.dynClient.Resource(monetizeapi.ServiceBountyGVR).Namespace(namespace).Get(context.Background(), name, metav1.GetOptions{}) + if err != nil { + t.Fatalf("get bounty for annotate: %v", err) + } + existing := raw.GetAnnotations() + if existing == nil { + existing = map[string]string{} + } + for k, v := range annotations { + existing[k] = v + } + raw.SetAnnotations(existing) + if _, err := c.dynClient.Resource(monetizeapi.ServiceBountyGVR).Namespace(namespace).Update(context.Background(), raw, metav1.UpdateOptions{}); err != nil { + t.Fatalf("annotate bounty: %v", err) + } +} + +func TestBountyLifecycle_OpenToPaid(t *testing.T) { + c := newBountyTestController(t, testBounty("bench")) + key := "hermes-obol-agent/bench" + + // Open: finalizer + task validation + escrow reserve. + reconcileBountyUntilSettled(t, c, key) + sb := getBounty(t, c, "hermes-obol-agent", "bench") + if !bountyConditionIsTrue(sb.Status.Conditions, "TaskValid") { + t.Fatalf("TaskValid not true: %+v", sb.Status.Conditions) + } + if !bountyConditionIsTrue(sb.Status.Conditions, "EscrowReserved") { + t.Fatalf("EscrowReserved not true: %+v", sb.Status.Conditions) + } + if sb.Status.EscrowState != escrow.StateReserved { + t.Fatalf("EscrowState = %q, want Reserved", sb.Status.EscrowState) + } + if sb.Status.Phase != bountyPhaseOpen { + t.Fatalf("phase = %q, want Open", sb.Status.Phase) + } + + // Claim. + annotateBounty(t, c, "hermes-obol-agent", "bench", map[string]string{ + bountyClaimAnnotation: "0x2222222222222222222222222222222222222222", + bountyCommitAnnotation: "0xc0ffee", + }) + reconcileBountyUntilSettled(t, c, key) + sb = getBounty(t, c, "hermes-obol-agent", "bench") + if sb.Status.Phase != bountyPhaseClaimed { + t.Fatalf("phase = %q, want Claimed", sb.Status.Phase) + } + if len(sb.Status.Claims) != 1 || sb.Status.Claims[0].CommitHash != "0xc0ffee" { + t.Fatalf("claims = %+v", sb.Status.Claims) + } + + // Submit. + annotateBounty(t, c, "hermes-obol-agent", "bench", map[string]string{ + bountySubmitAnnotation: `{"resultHash":"0xbeef","reportURI":"http://hermes.local/results/bench.a2ui.json"}`, + }) + reconcileBountyUntilSettled(t, c, key) + sb = getBounty(t, c, "hermes-obol-agent", "bench") + if sb.Status.Phase != bountyPhaseSubmitted { + t.Fatalf("phase = %q, want Submitted", sb.Status.Phase) + } + if sb.Status.ReportURI == "" { + t.Fatal("ReportURI not promoted from submission") + } + + // Poster accepts → Verified + Paid (ledger capture). + annotateBounty(t, c, "hermes-obol-agent", "bench", map[string]string{ + bountyVerdictAnnotation: "accept", + }) + reconcileBountyUntilSettled(t, c, key) + sb = getBounty(t, c, "hermes-obol-agent", "bench") + if !bountyConditionIsTrue(sb.Status.Conditions, "Verified") { + t.Fatalf("Verified not true: %+v", sb.Status.Conditions) + } + if !bountyConditionIsTrue(sb.Status.Conditions, "Paid") { + t.Fatalf("Paid not true: %+v", sb.Status.Conditions) + } + if sb.Status.Phase != bountyPhasePaid { + t.Fatalf("phase = %q, want Paid", sb.Status.Phase) + } + if sb.Status.WeightedScore != 100 { + t.Fatalf("weightedScore = %d, want 100", sb.Status.WeightedScore) + } + if !strings.HasPrefix(sb.Status.CaptureTxHash, "dev-ledger:") { + t.Fatalf("CaptureTxHash = %q, want dev-ledger label (never mistakable for settlement)", sb.Status.CaptureTxHash) + } + if len(sb.Status.Claims) != 1 || sb.Status.Claims[0].Phase != bountyPhasePaid { + t.Fatalf("claim phase = %+v, want Paid", sb.Status.Claims) + } +} + +func TestBountyLifecycle_InvalidTaskParks(t *testing.T) { + sb := testBounty("bad") + sb.Spec.Task.TypeRef = "does-not-exist@v9" + c := newBountyTestController(t, sb) + + reconcileBountyUntilSettled(t, c, "hermes-obol-agent/bad") + got := getBounty(t, c, "hermes-obol-agent", "bad") + if got.Status.Phase != bountyPhaseInvalid { + t.Fatalf("phase = %q, want Invalid", got.Status.Phase) + } + if bountyConditionIsTrue(got.Status.Conditions, "TaskValid") { + t.Fatal("TaskValid should be false for unknown typeRef") + } +} + +func TestBountyLifecycle_BadParamEnumParks(t *testing.T) { + sb := testBounty("bad-param") + sb.Spec.Task.Params = map[string]string{"dtype": "fp64"} + c := newBountyTestController(t, sb) + + reconcileBountyUntilSettled(t, c, "hermes-obol-agent/bad-param") + got := getBounty(t, c, "hermes-obol-agent", "bad-param") + if got.Status.Phase != bountyPhaseInvalid { + t.Fatalf("phase = %q, want Invalid", got.Status.Phase) + } +} + +func TestBountyLifecycle_UnknownParamParks(t *testing.T) { + sb := testBounty("typo-param") + sb.Spec.Task.Params = map[string]string{"hardwreClass": "H100"} + c := newBountyTestController(t, sb) + + reconcileBountyUntilSettled(t, c, "hermes-obol-agent/typo-param") + got := getBounty(t, c, "hermes-obol-agent", "typo-param") + if got.Status.Phase != bountyPhaseInvalid { + t.Fatalf("phase = %q, want Invalid (unknown params are typo'd intent, not extensibility)", got.Status.Phase) + } +} + +func TestBountyLifecycle_MultiFulfillerParks(t *testing.T) { + sb := testBounty("multi") + sb.Spec.MaxFulfillers = 3 + c := newBountyTestController(t, sb) + + reconcileBountyUntilSettled(t, c, "hermes-obol-agent/multi") + got := getBounty(t, c, "hermes-obol-agent", "multi") + if got.Status.Phase != bountyPhaseInvalid { + t.Fatalf("phase = %q, want Invalid (v1 is single-winner; silently honoring >1 promises a race semantic that doesn't exist)", got.Status.Phase) + } +} + +func TestBountyLifecycle_DeadlineRefunds(t *testing.T) { + sb := testBounty("late") + past := metav1.NewTime(time.Now().Add(-time.Hour)) + sb.Spec.Deadline = &past + c := newBountyTestController(t, sb) + key := "hermes-obol-agent/late" + + // First pass adds the finalizer; the next passes reserve then refund. + for i := 0; i < 3; i++ { + if err := c.reconcileBounty(context.Background(), key); err != nil { + t.Fatalf("reconcile pass %d: %v", i, err) + } + } + got := getBounty(t, c, "hermes-obol-agent", "late") + if got.Status.Phase != bountyPhaseExpired && got.Status.Phase != bountyPhaseRefunded { + t.Fatalf("phase = %q, want Expired or Refunded", got.Status.Phase) + } + if bountyConditionIsTrue(got.Status.Conditions, "Paid") { + t.Fatal("expired bounty must not pay") + } +} + +func TestBountyLifecycle_RejectVerdict(t *testing.T) { + c := newBountyTestController(t, testBounty("rejected")) + key := "hermes-obol-agent/rejected" + + reconcileBountyUntilSettled(t, c, key) + annotateBounty(t, c, "hermes-obol-agent", "rejected", map[string]string{ + bountyClaimAnnotation: "0x3333333333333333333333333333333333333333", + bountySubmitAnnotation: `{"resultHash":"0x1","reportURI":"http://x"}`, + bountyVerdictAnnotation: "reject:scores out of tolerance", + }) + reconcileBountyUntilSettled(t, c, key) + + got := getBounty(t, c, "hermes-obol-agent", "rejected") + if got.Status.Phase != bountyPhaseRejected { + t.Fatalf("phase = %q, want Rejected", got.Status.Phase) + } + if bountyConditionIsTrue(got.Status.Conditions, "Paid") { + t.Fatal("rejected bounty must not pay") + } + if got.Status.EscrowState != escrow.StateReserved { + t.Fatalf("EscrowState = %q; rejection keeps the hold until deadline refund or poster delete", got.Status.EscrowState) + } +} + +func TestBountyLifecycle_InvalidClaimAddress(t *testing.T) { + c := newBountyTestController(t, testBounty("badclaim")) + key := "hermes-obol-agent/badclaim" + + reconcileBountyUntilSettled(t, c, key) + annotateBounty(t, c, "hermes-obol-agent", "badclaim", map[string]string{ + bountyClaimAnnotation: "not-an-address", + }) + reconcileBountyUntilSettled(t, c, key) + + got := getBounty(t, c, "hermes-obol-agent", "badclaim") + if len(got.Status.Claims) != 0 { + t.Fatalf("claims = %+v, want none for invalid address", got.Status.Claims) + } + if got.Status.Phase != bountyPhaseOpen { + t.Fatalf("phase = %q, want Open", got.Status.Phase) + } +} + +// ── voucher ferry (Permit2 vouchers ride annotations into ReserveRequests) ── + +func TestBountyLifecycle_RewardVoucherFerry(t *testing.T) { + fake := newFakeEscrow() + fake.spender = "0xFAC0000000000000000000000000000000000FAC" + fake.requireVoucher["uid-ferry"] = true + c := newBountyTestController(t, testBounty("ferry")) + c.bountyEscrow = fake + ns := "hermes-obol-agent" + key := ns + "/ferry" + + // No voucher yet: the hold parks in AwaitingVoucher — surfaced as a + // condition, never a reconcile error — and the facilitator's spender is + // ferried into status for the poster-side signer. + reconcileBountyUntilSettled(t, c, key) + sb := getBounty(t, c, ns, "ferry") + if sb.Status.EscrowState != escrowStateAwaitingVoucher { + t.Fatalf("EscrowState = %q, want AwaitingVoucher", sb.Status.EscrowState) + } + if reason := conditionReason(sb.Status.Conditions, "EscrowReserved"); reason != "EscrowAwaitingVoucher" { + t.Fatalf("EscrowReserved reason = %q, want EscrowAwaitingVoucher", reason) + } + if sb.Status.EscrowSpender != fake.spender { + t.Fatalf("EscrowSpender = %q, want %q ferried from the receipt", sb.Status.EscrowSpender, fake.spender) + } + + // The signed voucher ferries in → re-reserve picks it up → Reserved. + annotateBounty(t, c, ns, "ferry", map[string]string{ + bountyRewardVoucherAnnotation: `{"owner":"0x1111111111111111111111111111111111111111","token":"0x036CbD53842c5426634e7929541eC2318f3dCF7e","network":"base","spender":"0xFAC0000000000000000000000000000000000FAC","nonce":"7","deadline":1893456000,"recipients":[{"address":"0x2222222222222222222222222222222222222222","amount":"500000000"}],"signature":"0xabcd"}`, + }) + reconcileBountyUntilSettled(t, c, key) + sb = getBounty(t, c, ns, "ferry") + if sb.Status.EscrowState != escrow.StateReserved { + t.Fatalf("EscrowState = %q, want Reserved after the voucher arrived", sb.Status.EscrowState) + } + if !bountyConditionIsTrue(sb.Status.Conditions, "EscrowReserved") { + t.Fatal("EscrowReserved must be true once the voucher-backed hold lands") + } + req := fake.lastReserve(t, "uid-ferry") + if req.Voucher == nil || req.Voucher.Nonce != "7" || len(req.Voucher.Recipients) != 1 { + t.Fatalf("voucher not ferried intact: %+v", req.Voucher) + } + + // Claim → submit → accept → capture: the full transition chain + // AwaitingVoucher → Reserved → Captured. + annotateBounty(t, c, ns, "ferry", map[string]string{ + bountyClaimAnnotation: "0x2222222222222222222222222222222222222222", + }) + reconcileBountyUntilSettled(t, c, key) + annotateBounty(t, c, ns, "ferry", map[string]string{ + bountySubmitAnnotation: `{"resultHash":"0xbeef","reportURI":"http://x"}`, + bountyVerdictAnnotation: "accept", + }) + reconcileBountyUntilSettled(t, c, key) + sb = getBounty(t, c, ns, "ferry") + if sb.Status.EscrowState != escrow.StateCaptured { + t.Fatalf("EscrowState = %q, want Captured", sb.Status.EscrowState) + } + if sb.Status.Phase != bountyPhasePaid { + t.Fatalf("phase = %q, want Paid", sb.Status.Phase) + } +} + +func TestBountyLifecycle_BondAndEvalVoucherFerry(t *testing.T) { + fake := newFakeEscrow() + fake.requireVoucher["uid-legs-bond"] = true + fake.requireVoucher["uid-legs-eval"] = true + sb := testEvalBounty("legs") + sb.Spec.Trust.SelfBond = monetizeapi.ServiceBountySelfBond{Required: true, Amount: "10.00", Token: "OBOL"} + c := newBountyTestController(t, sb) + c.bountyEscrow = fake + ns := "hermes-obol-agent" + key := ns + "/legs" + + claimAndSubmit(t, c, ns, "legs") + got := getBounty(t, c, ns, "legs") + if got.Status.BondState != escrowStateAwaitingVoucher { + t.Fatalf("BondState = %q, want AwaitingVoucher (parked, not an error)", got.Status.BondState) + } + if got.Status.EvalBudgetState != escrowStateAwaitingVoucher { + t.Fatalf("EvalBudgetState = %q, want AwaitingVoucher", got.Status.EvalBudgetState) + } + + annotateBounty(t, c, ns, "legs", map[string]string{ + bountyBondVoucherAnnotation: `{"owner":"0x2222222222222222222222222222222222222222","token":"0xOB","network":"base","nonce":"1","deadline":1,"signature":"0x01"}`, + bountyEvalVoucherAnnotation: `{"owner":"0x1111111111111111111111111111111111111111","token":"0xOB","network":"base","nonce":"2","deadline":1,"signature":"0x02"}`, + }) + reconcileBountyUntilSettled(t, c, key) + got = getBounty(t, c, ns, "legs") + if got.Status.BondState != escrow.StateReserved { + t.Fatalf("BondState = %q, want Reserved after bond voucher", got.Status.BondState) + } + if got.Status.EvalBudgetState != escrow.StateReserved { + t.Fatalf("EvalBudgetState = %q, want Reserved after eval voucher", got.Status.EvalBudgetState) + } + if fake.lastReserve(t, "uid-legs-bond").Voucher.Nonce != "1" { + t.Fatal("bond voucher not attached to the bond reserve") + } + if fake.lastReserve(t, "uid-legs-eval").Voucher.Nonce != "2" { + t.Fatal("eval voucher not attached to the eval-budget reserve") + } +} + +func TestBountyLifecycle_EscrowSpenderFerriedOnce(t *testing.T) { + fake := newFakeEscrow() + fake.spender = "0xFAC0000000000000000000000000000000000001" + sb := testBounty("spender") + sb.Spec.Trust.SelfBond = monetizeapi.ServiceBountySelfBond{Required: true, Amount: "10.00", Token: "OBOL"} + c := newBountyTestController(t, sb) + c.bountyEscrow = fake + ns := "hermes-obol-agent" + key := ns + "/spender" + + reconcileBountyUntilSettled(t, c, key) + got := getBounty(t, c, ns, "spender") + if got.Status.EscrowSpender != "0xFAC0000000000000000000000000000000000001" { + t.Fatalf("EscrowSpender = %q, want first receipt's spender", got.Status.EscrowSpender) + } + + // A later receipt reporting a different spender must NOT overwrite the + // first — signers bind vouchers to one executor. + fake.mu.Lock() + fake.spender = "0xFAC0000000000000000000000000000000000002" + fake.mu.Unlock() + annotateBounty(t, c, ns, "spender", map[string]string{ + bountyClaimAnnotation: "0x2222222222222222222222222222222222222222", + }) + reconcileBountyUntilSettled(t, c, key) + got = getBounty(t, c, ns, "spender") + if got.Status.EscrowSpender != "0xFAC0000000000000000000000000000000000001" { + t.Fatalf("EscrowSpender = %q, want the FIRST spender preserved", got.Status.EscrowSpender) + } +} + +func TestBountyLifecycle_CaptureVoucherRefusalParksNotFails(t *testing.T) { + fake := newFakeEscrow() + fake.captureErr["uid-refuse"] = fmt.Errorf("escrow capture uid-refuse: facilitator returned 409: AwaitingVoucher: settlement voucher missing") + c := newBountyTestController(t, testBounty("refuse")) + c.bountyEscrow = fake + ns := "hermes-obol-agent" + key := ns + "/refuse" + + reconcileBountyUntilSettled(t, c, key) + annotateBounty(t, c, ns, "refuse", map[string]string{ + bountyClaimAnnotation: "0x2222222222222222222222222222222222222222", + bountySubmitAnnotation: `{"resultHash":"0x1","reportURI":"http://x"}`, + bountyVerdictAnnotation: "accept", + }) + // reconcileBountyUntilSettled fails the test on a reconcile error — a + // voucher-refused capture must park as a condition instead. + reconcileBountyUntilSettled(t, c, key) + + got := getBounty(t, c, ns, "refuse") + if reason := conditionReason(got.Status.Conditions, "Paid"); reason != "EscrowAwaitingVoucher" { + t.Fatalf("Paid reason = %q, want EscrowAwaitingVoucher", reason) + } + if got.Status.EscrowState != escrow.StateReserved { + t.Fatalf("EscrowState = %q, want still Reserved", got.Status.EscrowState) + } + if got.Status.Phase != bountyPhaseVerified { + t.Fatalf("phase = %q, want Verified (accepted, awaiting settlement voucher)", got.Status.Phase) + } + + // Once the facilitator stops refusing (voucher arrived on its side), the + // next reconcile captures. + fake.mu.Lock() + delete(fake.captureErr, "uid-refuse") + fake.mu.Unlock() + reconcileBountyUntilSettled(t, c, key) + got = getBounty(t, c, ns, "refuse") + if got.Status.Phase != bountyPhasePaid { + t.Fatalf("phase = %q, want Paid after the refusal clears", got.Status.Phase) + } +} + +func TestBountyLifecycle_RefundVoidsEscalationBudget(t *testing.T) { + fake := newFakeEscrow() + sb := testEvalBounty("evict") + past := metav1.NewTime(time.Now().Add(time.Hour)) + sb.Spec.Deadline = &past + c := newBountyTestController(t, sb) + c.bountyEscrow = fake + stubEscalationPanel(t, r1Panel(7), nil) + ns := "hermes-obol-agent" + key := ns + "/evict" + + claimAndSubmit(t, c, ns, "evict") + commitAndReveal(t, c, ns, "evict", map[string]int64{evalA: 10, evalB: 45, evalC: 100}) + + got := getBounty(t, c, ns, "evict") + if got.Status.Escalation == nil || got.Status.Escalation.BudgetState != escrow.StateReserved { + t.Fatalf("escalation = %+v, want a funded escalation", got.Status.Escalation) + } + + // Deadline passes with the escalation still unresolved → refund returns + // every held leg, including the round-1 eval budget. + expired := metav1.NewTime(time.Now().Add(-time.Minute)) + raw, err := c.dynClient.Resource(monetizeapi.ServiceBountyGVR).Namespace(ns).Get(context.Background(), "evict", metav1.GetOptions{}) + if err != nil { + t.Fatalf("get bounty: %v", err) + } + if err := unstructured.SetNestedField(raw.Object, expired.UTC().Format(time.RFC3339), "spec", "deadline"); err != nil { + t.Fatalf("set deadline: %v", err) + } + if _, err := c.dynClient.Resource(monetizeapi.ServiceBountyGVR).Namespace(ns).Update(context.Background(), raw, metav1.UpdateOptions{}); err != nil { + t.Fatalf("update bounty: %v", err) + } + reconcileBountyUntilSettled(t, c, key) + + got = getBounty(t, c, ns, "evict") + if got.Status.Phase != bountyPhaseRefunded { + t.Fatalf("phase = %q, want Refunded", got.Status.Phase) + } + if got.Status.Escalation.BudgetState != escrow.StateVoided { + t.Fatalf("escalation budget = %q, want Voided on refund", got.Status.Escalation.BudgetState) + } + fake.mu.Lock() + state := fake.states["uid-evict-eval-r1"] + fake.mu.Unlock() + if state != escrow.StateVoided { + t.Fatalf("facilitator state for eval-r1 = %q, want Voided", state) + } +} diff --git a/internal/serviceoffercontroller/bounty_panel.go b/internal/serviceoffercontroller/bounty_panel.go new file mode 100644 index 00000000..6feda959 --- /dev/null +++ b/internal/serviceoffercontroller/bounty_panel.go @@ -0,0 +1,465 @@ +package serviceoffercontroller + +// Evaluator panel selection + ladder bookkeeping (design doc §11.4). +// +// Selection is controller-side weighted sampling — the honest local-first +// stand-in for VRF (the swap seam is exactly this function). It is +// DETERMINISTIC per bounty: seeded from the controller's seedSource (local: +// sha256(UID); drand: a beacon that does not exist yet at posting time) so +// every reconcile computes the same panel (idempotence), and the poster +// cannot re-roll evaluators by touching the spec. The seed's provenance is +// persisted into status.panelSeed so the draw is auditable. +// +// Seats: k counting seats (Full tier, plus at most ONE Probation seat on +// value-capped bounties — the median absorbs one outlier, which is what makes +// the newcomer seat verdict-safe) + up to two free Shadow seats, randomly +// ASSIGNED (a sybil can't choose where to warm reputation). If the enrolled +// pool can't fill k counting seats the bounty falls back to open-door (any +// address may evaluate), and ladder bookkeeping still applies to enrolled +// participants — open-door participation is how the first evaluators climb +// out of Shadow. +// +// Reputation is read through the decay lens (internal/bounty/decay.go): the +// lottery weight uses the half-life-decayed completion count, a stored Full +// tier reads as Probation once stale, and chain-grounded verdicts earn a +// weight bonus. Stored counters are never mutated by decay. + +import ( + "context" + "crypto/sha256" + "encoding/binary" + "fmt" + "log" + "math/rand" + "slices" + "sort" + "strconv" + "strings" + "time" + + "github.com/ethereum/go-ethereum/common" + + "github.com/ObolNetwork/obol-stack/internal/bounty" + "github.com/ObolNetwork/obol-stack/internal/monetizeapi" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apimachinery/pkg/runtime" +) + +const ( + maxShadowSeats = 2 + recentFulfillersKept = 5 + // pairDiversityWeight down-weights an evaluator who recently judged the + // same fulfiller (anti-collusion: break up cozy evaluator↔fulfiller pairs). + pairDiversityWeight = 0.25 + + // escalationSeedSuffix derives the escalation-round seed from the round-0 + // seed: sha256(round0seed || suffix). Same beacon, distinct lottery. + escalationSeedSuffix = "escalation-r1" +) + +// evaluatorCandidate is one enrolled evaluator considered for selection. +type evaluatorCandidate struct { + Address string + Record monetizeapi.EvaluatorLadderRecord +} + +// panelSeedSource returns the controller's seed source, defaulting to the +// local deterministic seed when none was wired (tests construct Controller +// literals). +func (c *Controller) panelSeedSource() seedSource { + if c.seeds == nil { + return localSeedSource{} + } + return c.seeds +} + +// listEnrollmentsForTask returns the enrolled evaluators for a task type in +// the bounty's namespace. +func (c *Controller) listEnrollmentsForTask(ctx context.Context, namespace, taskRef string) ([]monetizeapi.EvaluatorEnrollment, error) { + raw, err := c.dynClient.Resource(monetizeapi.EvaluatorEnrollmentGVR).Namespace(namespace).List(ctx, metav1.ListOptions{}) + if err != nil { + return nil, err + } + var out []monetizeapi.EvaluatorEnrollment + for i := range raw.Items { + var enrollment monetizeapi.EvaluatorEnrollment + if err := runtime.DefaultUnstructuredConverter.FromUnstructured(raw.Items[i].Object, &enrollment); err != nil { + continue + } + if slices.Contains(enrollment.Spec.TaskTypes, taskRef) { + out = append(out, enrollment) + } + } + sort.Slice(out, func(i, j int) bool { return out[i].Spec.Address < out[j].Spec.Address }) + return out, nil +} + +// ladderRecordFor returns the enrollment's ladder record for taskRef; new +// enrollments start at Shadow. +func ladderRecordFor(enrollment *monetizeapi.EvaluatorEnrollment, taskRef string) monetizeapi.EvaluatorLadderRecord { + for _, r := range enrollment.Status.Records { + if r.TaskType == taskRef { + return r + } + } + return monetizeapi.EvaluatorLadderRecord{TaskType: taskRef, Tier: monetizeapi.EvaluatorTierShadow} +} + +// ladderForTask resolves the task package's ladder for taskRef; zero Ladder +// (with parse-time defaults applied by callees) when the type is unknown. +func ladderForTask(taskRef string) bounty.Ladder { + if t, err := bounty.Resolve(taskRef); err == nil { + return t.Eval.Ladder + } + return bounty.Ladder{} +} + +// ladderWeight is THE lottery weight: 1 + 0.1×(effectiveCompleted − +// divergences) floored at 0.1, where effectiveCompleted is the half-life- +// decayed completion count; ×0.25 pair-diversity penalty for a recently +// judged fulfiller; ×(1 + min(1, grounded/completed)) bonus for verdicts +// grounded by on-chain ERC-8004 validation entries. +func ladderWeight(record monetizeapi.EvaluatorLadderRecord, fulfiller string, halfLife time.Duration, now time.Time) float64 { + var lastEval *time.Time + if record.LastEvalAt != nil { + lastEval = &record.LastEvalAt.Time + } + effective := bounty.EffectiveCompleted(int(record.Completed), lastEval, now, halfLife) + w := 1.0 + 0.1*(effective-float64(record.Divergences)) + if w < 0.1 { + w = 0.1 + } + if fulfiller != "" && slices.Contains(record.RecentFulfillers, fulfiller) { + w *= pairDiversityWeight + } + denom := record.Completed + if denom < 1 { + denom = 1 + } + bonus := float64(record.GroundedEvals) / float64(denom) + if bonus > 1 { + bonus = 1 + } + return w * (1 + bonus) +} + +// rngFromSeed turns the 32-byte panel seed into the deterministic lottery RNG. +func rngFromSeed(seed [32]byte) *rand.Rand { + return rand.New(rand.NewSource(int64(binary.BigEndian.Uint64(seed[:8])))) //nolint:gosec // deterministic-by-design selection, not crypto +} + +// selectEvaluatorPanel performs the deterministic weighted sampling. Returns +// nil when the counting pool (Full+Probation, read through the decay lens) +// cannot fill k seats — the open-door fallback. +func selectEvaluatorPanel(seed [32]byte, pool []monetizeapi.EvaluatorEnrollment, taskRef string, k int64, rewardAmount string, ladder bounty.Ladder, fulfiller string, now time.Time) []monetizeapi.ServiceBountyPanelSeat { + halfLife := ladder.DecayHalfLifeDuration() + + var full, probation, shadow []evaluatorCandidate + for i := range pool { + candidate := evaluatorCandidate{ + Address: pool[i].Spec.Address, + Record: ladderRecordFor(&pool[i], taskRef), + } + // Tier gating goes through the decay lens: a stale Full reads as + // Probation here without mutating the stored record. + switch bounty.EffectiveTier(candidate.Record, ladder, now) { + case monetizeapi.EvaluatorTierFull: + full = append(full, candidate) + case monetizeapi.EvaluatorTierProbation: + probation = append(probation, candidate) + default: + shadow = append(shadow, candidate) + } + } + + counting := len(full) + len(probation) + if int64(counting) < k { + return nil // open-door fallback + } + + rng := rngFromSeed(seed) + weight := func(candidate evaluatorCandidate) float64 { + return ladderWeight(candidate.Record, fulfiller, halfLife, now) + } + + var seats []monetizeapi.ServiceBountyPanelSeat + + // One reserved probation seat on value-capped bounties: the median-of-k + // absorbs one outlier, so the newcomer seat is verdict-safe by + // construction — and only offered where the value cap allows. + remaining := k + if len(probation) > 0 && withinValueCap(rewardAmount, ladder.ProbationValueCap) && k >= 3 { + pick := weightedPick(rng, probation, weight) + seats = append(seats, monetizeapi.ServiceBountyPanelSeat{Address: pick.Address, Seat: monetizeapi.PanelSeatProbation}) + probation = removeCandidate(probation, pick.Address) + remaining-- + } + + countingPool := append(append([]evaluatorCandidate{}, full...), probation...) + for remaining > 0 && len(countingPool) > 0 { + pick := weightedPick(rng, countingPool, weight) + seats = append(seats, monetizeapi.ServiceBountyPanelSeat{Address: pick.Address, Seat: monetizeapi.PanelSeatFull}) + countingPool = removeCandidate(countingPool, pick.Address) + remaining-- + } + if remaining > 0 { + return nil // pool shrank under us — open-door + } + + // Shadows are randomly ASSIGNED, never chosen by the evaluator. + for i := 0; i < maxShadowSeats && len(shadow) > 0; i++ { + pick := shadow[rng.Intn(len(shadow))] + seats = append(seats, monetizeapi.ServiceBountyPanelSeat{Address: pick.Address, Seat: monetizeapi.PanelSeatShadow}) + shadow = removeCandidate(shadow, pick.Address) + } + + sort.Slice(seats, func(i, j int) bool { return seats[i].Address < seats[j].Address }) + return seats +} + +func weightedPick(rng *rand.Rand, pool []evaluatorCandidate, weight func(evaluatorCandidate) float64) evaluatorCandidate { + total := 0.0 + for _, candidate := range pool { + total += weight(candidate) + } + target := rng.Float64() * total + for _, candidate := range pool { + target -= weight(candidate) + if target <= 0 { + return candidate + } + } + return pool[len(pool)-1] +} + +func removeCandidate(pool []evaluatorCandidate, address string) []evaluatorCandidate { + out := pool[:0] + for _, candidate := range pool { + if candidate.Address != address { + out = append(out, candidate) + } + } + return out +} + +func withinValueCap(amount, cap string) bool { + a, errA := strconv.ParseFloat(strings.TrimSpace(amount), 64) + c, errC := strconv.ParseFloat(strings.TrimSpace(cap), 64) + if errA != nil || errC != nil || c <= 0 { + return false + } + return a <= c +} + +// ensurePanel runs selection exactly once per bounty (latched by the +// PanelSelected condition so a growing pool can never re-gate a bounty whose +// evaluation already started). A seed-source failure (drand relay down or a +// beacon failing verification) does NOT latch: the panel stays unselected and +// the bounty is requeued — never a silent fallback to the local seed. +func (c *Controller) ensurePanel(ctx context.Context, sb *monetizeapi.ServiceBounty, status *monetizeapi.ServiceBountyStatus) { + for _, condition := range status.Conditions { + if condition.Type == "PanelSelected" { + return + } + } + + seed, provenance, err := c.panelSeedSource().Seed(ctx, string(sb.UID), sb.CreationTimestamp.Time) + if err != nil { + log.Printf("bounty %s/%s: panel seed unavailable, retrying in %s: %v", sb.Namespace, sb.Name, seedRetryDelay, err) + if c.bountyQueue != nil { + c.bountyQueue.AddAfter(sb.Namespace+"/"+sb.Name, seedRetryDelay) + } + return + } + status.PanelSeed = &provenance + + taskRef := sb.Spec.Task.TypeRef + pool, err := c.listEnrollmentsForTask(ctx, sb.Namespace, taskRef) + if err != nil { + // Missing CRD / transient list error → open-door, recorded as such. + setPurchaseCondition(&status.Conditions, "PanelSelected", "False", "OpenDoor", + truncateMessage(fmt.Sprintf("enrollment pool unavailable (%v) — open-door evaluation", err))) + return + } + + k := sb.Spec.Eval.K + if k < 1 { + k = 1 + } + fulfiller := "" + if len(status.Claims) > 0 { + fulfiller = status.Claims[0].FulfillerAddress + } + + seats := selectEvaluatorPanel(seed, pool, taskRef, k, sb.Spec.Reward.Amount, ladderForTask(taskRef), fulfiller, time.Now()) + if seats == nil { + setPurchaseCondition(&status.Conditions, "PanelSelected", "False", "OpenDoor", + fmt.Sprintf("Enrolled pool has fewer than %d counting evaluators — open-door evaluation", k)) + return + } + status.EvaluatorPanel = seats + setPurchaseCondition(&status.Conditions, "PanelSelected", "True", "Selected", + fmt.Sprintf("%d counting seat(s) + %d shadow(s) selected from %d enrolled", k, len(seats)-int(k), len(pool))) +} + +// selectEscalationPanel draws the second-round panel for an escalated verdict: +// a FRESH, larger panel where every seat counts at full pay (no probation +// discount, no shadows — escalation is the tiebreaker, not the on-ramp), and +// every round-0 participant is excluded (keys of exclude are canonical EIP-55 +// addresses). The seed derives deterministically from the same round-0 seed +// ensurePanel used — sha256(round0seed || "escalation-r1") — recomputed via +// the seedSource (the provenance in status guarantees the same beacon), so +// repeated reconciles draw the same escalation panel. A pool smaller than +// size falls back to open-door (nil seats), same semantics as round 0. +func (c *Controller) selectEscalationPanel(ctx context.Context, sb *unstructured.Unstructured, size int, exclude map[string]bool) ([]monetizeapi.ServiceBountyPanelSeat, error) { + taskRef, _, _ := unstructured.NestedString(sb.Object, "spec", "task", "typeRef") + pool, err := c.listEnrollmentsForTask(ctx, sb.GetNamespace(), taskRef) + if err != nil { + return nil, err + } + + round0Seed, _, err := c.panelSeedSource().Seed(ctx, string(sb.GetUID()), sb.GetCreationTimestamp().Time) + if err != nil { + return nil, err + } + seed := sha256.Sum256(append(round0Seed[:], []byte(escalationSeedSuffix)...)) + + ladder := ladderForTask(taskRef) + halfLife := ladder.DecayHalfLifeDuration() + now := time.Now() + + fulfiller := "" + if claims, _, _ := unstructured.NestedSlice(sb.Object, "status", "claims"); len(claims) > 0 { + if claim, ok := claims[0].(map[string]any); ok { + fulfiller, _ = claim["fulfillerAddress"].(string) + } + } + + var counting []evaluatorCandidate + for i := range pool { + if exclude[common.HexToAddress(pool[i].Spec.Address).Hex()] { + continue // round-0 participants never re-judge their own divergence + } + candidate := evaluatorCandidate{ + Address: pool[i].Spec.Address, + Record: ladderRecordFor(&pool[i], taskRef), + } + switch bounty.EffectiveTier(candidate.Record, ladder, now) { + case monetizeapi.EvaluatorTierFull, monetizeapi.EvaluatorTierProbation: + counting = append(counting, candidate) + } + } + if len(counting) < size { + return nil, nil // open-door fallback, same as round 0's thin pool + } + + rng := rngFromSeed(seed) + weight := func(candidate evaluatorCandidate) float64 { + return ladderWeight(candidate.Record, fulfiller, halfLife, now) + } + + var seats []monetizeapi.ServiceBountyPanelSeat + for len(seats) < size && len(counting) > 0 { + pick := weightedPick(rng, counting, weight) + seats = append(seats, monetizeapi.ServiceBountyPanelSeat{Address: pick.Address, Seat: monetizeapi.PanelSeatFull}) + counting = removeCandidate(counting, pick.Address) + } + sort.Slice(seats, func(i, j int) bool { return seats[i].Address < seats[j].Address }) + return seats, nil +} + +// recordLadder applies the one-shot cross-bounty bookkeeping after the quorum +// settles: completion/divergence counters, shadow agreements, probation +// progress, tier promotions, the decay anchor (lastEvalAt), grounded-verdict +// counts, and the pair-diversity history. +func (c *Controller) recordLadder(ctx context.Context, sb *monetizeapi.ServiceBounty, status *monetizeapi.ServiceBountyStatus) error { + taskRef := sb.Spec.Task.TypeRef + thresholds := bounty.Ladder{ShadowAgreements: 5, ProbationEvals: 10} + if t, err := bounty.Resolve(taskRef); err == nil && t.Eval.Ladder.ShadowAgreements > 0 { + thresholds = t.Eval.Ladder + } + fulfiller := "" + if len(status.Claims) > 0 { + fulfiller = status.Claims[0].FulfillerAddress + } + now := metav1.Now() + + for _, evaluation := range status.Evaluations { + raw, err := c.findEnrollmentByAddress(ctx, sb.Namespace, evaluation.Address) + if err != nil || raw == nil { + continue // unenrolled open-door participant — nothing to record + } + var enrollment monetizeapi.EvaluatorEnrollment + if err := runtime.DefaultUnstructuredConverter.FromUnstructured(raw.Object, &enrollment); err != nil { + continue + } + + record := ladderRecordFor(&enrollment, taskRef) + record.Completed++ + record.LastEvalAt = now.DeepCopy() // the decay anchor: every counted participation re-stamps it + if evaluation.Grounded { + record.GroundedEvals++ + } + if !evaluation.WithinBand { + record.Divergences++ + } + switch record.Tier { + case monetizeapi.EvaluatorTierShadow: + if evaluation.WithinBand { + record.ShadowAgreements++ + } + if record.ShadowAgreements >= int64(thresholds.ShadowAgreements) { + record.Tier = monetizeapi.EvaluatorTierProbation + } + case monetizeapi.EvaluatorTierProbation: + if evaluation.WithinBand { + record.ProbationEvals++ + } + if record.ProbationEvals >= int64(thresholds.ProbationEvals) { + record.Tier = monetizeapi.EvaluatorTierFull + } + } + if fulfiller != "" { + record.RecentFulfillers = append([]string{fulfiller}, record.RecentFulfillers...) + if len(record.RecentFulfillers) > recentFulfillersKept { + record.RecentFulfillers = record.RecentFulfillers[:recentFulfillersKept] + } + } + + replaced := false + for i := range enrollment.Status.Records { + if enrollment.Status.Records[i].TaskType == taskRef { + enrollment.Status.Records[i] = record + replaced = true + } + } + if !replaced { + enrollment.Status.Records = append(enrollment.Status.Records, record) + } + + statusObject, err := runtime.DefaultUnstructuredConverter.ToUnstructured(&enrollment.Status) + if err != nil { + return err + } + patched := raw.DeepCopy() + patched.Object["status"] = statusObject + if _, err := c.dynClient.Resource(monetizeapi.EvaluatorEnrollmentGVR).Namespace(sb.Namespace).UpdateStatus(ctx, patched, metav1.UpdateOptions{}); err != nil { + return fmt.Errorf("update ladder for %s: %w", evaluation.Address, err) + } + } + return nil +} + +func (c *Controller) findEnrollmentByAddress(ctx context.Context, namespace, address string) (*unstructured.Unstructured, error) { + list, err := c.dynClient.Resource(monetizeapi.EvaluatorEnrollmentGVR).Namespace(namespace).List(ctx, metav1.ListOptions{}) + if err != nil { + return nil, err + } + for i := range list.Items { + if addr, _, _ := unstructured.NestedString(list.Items[i].Object, "spec", "address"); strings.EqualFold(addr, address) { + return &list.Items[i], nil + } + } + return nil, nil +} diff --git a/internal/serviceoffercontroller/bounty_panel_test.go b/internal/serviceoffercontroller/bounty_panel_test.go new file mode 100644 index 00000000..9cbe04b2 --- /dev/null +++ b/internal/serviceoffercontroller/bounty_panel_test.go @@ -0,0 +1,562 @@ +package serviceoffercontroller + +import ( + "context" + "crypto/sha256" + "fmt" + "math" + "reflect" + "strings" + "testing" + "time" + + "github.com/ObolNetwork/obol-stack/internal/bounty" + "github.com/ObolNetwork/obol-stack/internal/monetizeapi" + "github.com/ObolNetwork/obol-stack/internal/x402/escrow" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/client-go/dynamic/fake" + "k8s.io/client-go/util/workqueue" +) + +// testPanelLadder mirrors the benchmark@v1 ladder shape used across panel +// tests: probation cap 50.00, default decay knobs. +var testPanelLadder = bounty.Ladder{ + ShadowAgreements: 5, + ProbationEvals: 10, + ProbationValueCap: "50.00", + DecayHalfLife: "720h", +} + +func seedOf(uid string) [32]byte { return sha256.Sum256([]byte(uid)) } + +func testEnrollment(t *testing.T, name, address, tier string) *unstructured.Unstructured { + t.Helper() + enrollment := monetizeapi.EvaluatorEnrollment{ + TypeMeta: metav1.TypeMeta{ + APIVersion: monetizeapi.Group + "/" + monetizeapi.Version, + Kind: monetizeapi.EvaluatorEnrollmentKind, + }, + ObjectMeta: metav1.ObjectMeta{Name: name, Namespace: "hermes-obol-agent"}, + Spec: monetizeapi.EvaluatorEnrollmentSpec{ + Address: address, + TaskTypes: []string{"benchmark@v1"}, + }, + } + if tier != "" { + enrollment.Status.Records = []monetizeapi.EvaluatorLadderRecord{{TaskType: "benchmark@v1", Tier: tier}} + } + obj, err := runtime.DefaultUnstructuredConverter.ToUnstructured(&enrollment) + if err != nil { + t.Fatalf("enrollment to unstructured: %v", err) + } + return &unstructured.Unstructured{Object: obj} +} + +func newPanelTestController(t *testing.T, sb *monetizeapi.ServiceBounty, enrollments ...*unstructured.Unstructured) *Controller { + t.Helper() + objects := []runtime.Object{mustBountyObject(t, sb)} + for _, e := range enrollments { + objects = append(objects, e) + } + dynClient := fake.NewSimpleDynamicClientWithCustomListKinds( + runtime.NewScheme(), + map[schema.GroupVersionResource]string{ + monetizeapi.ServiceBountyGVR: "ServiceBountyList", + monetizeapi.EvaluatorEnrollmentGVR: "EvaluatorEnrollmentList", + }, + objects..., + ) + return &Controller{ + dynClient: dynClient, + bountyQueue: workqueue.NewTypedRateLimitingQueue(workqueue.DefaultTypedControllerRateLimiter[string]()), + bountyEscrow: escrow.NewLedgerGateway(), + } +} + +func TestSelectEvaluatorPanel_DeterministicPerBounty(t *testing.T) { + pool := []monetizeapi.EvaluatorEnrollment{} + for i := 0; i < 6; i++ { + addr := fmt.Sprintf("0x%040d", i) + pool = append(pool, monetizeapi.EvaluatorEnrollment{ + Spec: monetizeapi.EvaluatorEnrollmentSpec{Address: addr, TaskTypes: []string{"benchmark@v1"}}, + Status: monetizeapi.EvaluatorEnrollmentStatus{Records: []monetizeapi.EvaluatorLadderRecord{ + {TaskType: "benchmark@v1", Tier: monetizeapi.EvaluatorTierFull}, + }}, + }) + } + + now := time.Now() + a := selectEvaluatorPanel(seedOf("uid-1"), pool, "benchmark@v1", 3, "5.00", testPanelLadder, "0xf", now) + b := selectEvaluatorPanel(seedOf("uid-1"), pool, "benchmark@v1", 3, "5.00", testPanelLadder, "0xf", now) + if !reflect.DeepEqual(a, b) { + t.Fatalf("selection must be deterministic per bounty UID:\n%v\n%v", a, b) + } + if len(a) != 3 { + t.Fatalf("got %d seats, want 3", len(a)) + } +} + +func TestSelectEvaluatorPanel_OpenDoorWhenPoolThin(t *testing.T) { + pool := []monetizeapi.EvaluatorEnrollment{ + { + Spec: monetizeapi.EvaluatorEnrollmentSpec{Address: "0x" + strings.Repeat("1", 40), TaskTypes: []string{"benchmark@v1"}}, + Status: monetizeapi.EvaluatorEnrollmentStatus{Records: []monetizeapi.EvaluatorLadderRecord{{TaskType: "benchmark@v1", Tier: monetizeapi.EvaluatorTierFull}}}, + }, + // Shadows are not counting candidates. + {Spec: monetizeapi.EvaluatorEnrollmentSpec{Address: "0x" + strings.Repeat("2", 40), TaskTypes: []string{"benchmark@v1"}}}, + } + if seats := selectEvaluatorPanel(seedOf("uid"), pool, "benchmark@v1", 3, "5.00", testPanelLadder, "", time.Now()); seats != nil { + t.Fatalf("thin pool must fall back to open-door, got %v", seats) + } +} + +func TestSelectEvaluatorPanel_ProbationSeatValueCapped(t *testing.T) { + pool := []monetizeapi.EvaluatorEnrollment{} + for i := 0; i < 4; i++ { + pool = append(pool, monetizeapi.EvaluatorEnrollment{ + Spec: monetizeapi.EvaluatorEnrollmentSpec{Address: fmt.Sprintf("0x%040d", i), TaskTypes: []string{"benchmark@v1"}}, + Status: monetizeapi.EvaluatorEnrollmentStatus{Records: []monetizeapi.EvaluatorLadderRecord{ + {TaskType: "benchmark@v1", Tier: monetizeapi.EvaluatorTierFull}, + }}, + }) + } + pool = append(pool, monetizeapi.EvaluatorEnrollment{ + Spec: monetizeapi.EvaluatorEnrollmentSpec{Address: "0x" + strings.Repeat("9", 40), TaskTypes: []string{"benchmark@v1"}}, + Status: monetizeapi.EvaluatorEnrollmentStatus{Records: []monetizeapi.EvaluatorLadderRecord{ + {TaskType: "benchmark@v1", Tier: monetizeapi.EvaluatorTierProbation}, + }}, + }) + + countProbation := func(seats []monetizeapi.ServiceBountyPanelSeat) int { + n := 0 + for _, s := range seats { + if s.Seat == monetizeapi.PanelSeatProbation { + n++ + } + } + return n + } + + under := selectEvaluatorPanel(seedOf("uid"), pool, "benchmark@v1", 3, "5.00", testPanelLadder, "", time.Now()) + if countProbation(under) != 1 { + t.Errorf("reward under the cap must seat exactly one probationer, got %d (%v)", countProbation(under), under) + } + over := selectEvaluatorPanel(seedOf("uid"), pool, "benchmark@v1", 3, "500.00", testPanelLadder, "", time.Now()) + if countProbation(over) != 0 { + t.Errorf("reward above the cap must seat no probationer, got %d (%v)", countProbation(over), over) + } +} + +// Full panel-mode lifecycle: panel gates out a non-panel commit, the shadow is +// graded but not counted, evaluators get paid, the ladder records. +func TestEvalMarket_PanelMode(t *testing.T) { + sb := testEvalBounty("panel") + sb.Spec.Trust.SelfBond = monetizeapi.ServiceBountySelfBond{} + pool := []*unstructured.Unstructured{ + testEnrollment(t, "ev-a", evalA, monetizeapi.EvaluatorTierFull), + testEnrollment(t, "ev-b", evalB, monetizeapi.EvaluatorTierFull), + testEnrollment(t, "ev-c", evalC, monetizeapi.EvaluatorTierFull), + testEnrollment(t, "ev-shadow", "0xdddddddddddddddddddddddddddddddddddddddd", ""), + } + c := newPanelTestController(t, sb, pool...) + ns := "hermes-obol-agent" + + claimAndSubmit(t, c, ns, "panel") + + got := getBounty(t, c, ns, "panel") + if len(got.Status.EvaluatorPanel) != 4 { + t.Fatalf("panel = %v, want 3 counting + 1 shadow", got.Status.EvaluatorPanel) + } + seatOf := map[string]string{} + for _, seat := range got.Status.EvaluatorPanel { + seatOf[strings.ToLower(seat.Address)] = seat.Seat + } + if seatOf["0xdddddddddddddddddddddddddddddddddddddddd"] != monetizeapi.PanelSeatShadow { + t.Fatalf("the Shadow-tier enrollee must hold the shadow seat: %v", seatOf) + } + if got.Status.EvalBudgetState != escrow.StateReserved { + t.Fatalf("eval budget state = %q, want Reserved at panel selection", got.Status.EvalBudgetState) + } + + // A non-panel outsider tries to commit — must be ignored. + outsider := "0xeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee" + annotateBounty(t, c, ns, "panel", map[string]string{ + "obol.org/eval-commit-" + outsider: monetizeapi.EvalCommitHash(99, "x", outsider), + }) + + // Panel members (incl. the shadow) commit and reveal. + scores := map[string]int64{evalA: 90, evalB: 85, evalC: 80, "0xdddddddddddddddddddddddddddddddddddddddd": 10} + for addr, score := range scores { + annotateBounty(t, c, ns, "panel", map[string]string{ + "obol.org/eval-commit-" + addr: monetizeapi.EvalCommitHash(score, "salt-"+addr, addr), + }) + } + reconcileBountyUntilSettled(t, c, ns+"/panel") + for addr, score := range scores { + annotateBounty(t, c, ns, "panel", map[string]string{ + "obol.org/eval-reveal-" + addr: fmt.Sprintf(`{"score":%d,"salt":"salt-%s"}`, score, addr), + }) + } + reconcileBountyUntilSettled(t, c, ns+"/panel") + + got = getBounty(t, c, ns, "panel") + for _, ev := range got.Status.Evaluations { + if strings.EqualFold(ev.Address, outsider) { + t.Error("non-panel commit must be ignored in panel mode") + } + } + if got.Status.WeightedScore != 85 { + t.Errorf("WeightedScore = %d, want 85 (shadow's 10 must not move the median)", got.Status.WeightedScore) + } + if got.Status.Phase != bountyPhasePaid { + t.Fatalf("phase = %q, want Paid", got.Status.Phase) + } + if got.Status.EvalBudgetState != escrow.StateCaptured || got.Status.EvalPayoutTxHash == "" { + t.Errorf("eval budget = %q payout=%q, want Captured with a batch receipt", got.Status.EvalBudgetState, got.Status.EvalPayoutTxHash) + } + for _, ev := range got.Status.Evaluations { + isShadow := ev.Seat == monetizeapi.PanelSeatShadow + if ev.Paid == isShadow { + t.Errorf("evaluator %s (seat=%s) paid=%v — counting seats are paid, shadows are free", ev.Address, ev.Seat, ev.Paid) + } + } + if !got.Status.LadderRecorded { + t.Fatal("ladder bookkeeping must latch after settle") + } + + // Ladder: the shadow diverged (10 vs median 85, out of band) → no + // agreement; counting members completed in band. + shadowRecord := ladderStatusOf(t, c, ns, "ev-shadow") + if shadowRecord.ShadowAgreements != 0 || shadowRecord.Completed != 1 || shadowRecord.Divergences != 1 { + t.Errorf("shadow record = %+v, want completed=1 divergences=1 agreements=0", shadowRecord) + } + fullRecord := ladderStatusOf(t, c, ns, "ev-a") + if fullRecord.Completed != 1 || fullRecord.Divergences != 0 { + t.Errorf("full record = %+v, want completed=1 divergences=0", fullRecord) + } + if len(fullRecord.RecentFulfillers) == 0 { + t.Error("pair-diversity history must record the fulfiller") + } +} + +// A shadow agreeing with the median climbs toward Probation. +func TestEvalMarket_ShadowAgreementClimbs(t *testing.T) { + sb := testEvalBounty("shadow-climb") + pool := []*unstructured.Unstructured{ + testEnrollment(t, "ev-a", evalA, monetizeapi.EvaluatorTierFull), + testEnrollment(t, "ev-b", evalB, monetizeapi.EvaluatorTierFull), + testEnrollment(t, "ev-c", evalC, monetizeapi.EvaluatorTierFull), + testEnrollment(t, "ev-shadow", "0xdddddddddddddddddddddddddddddddddddddddd", ""), + } + c := newPanelTestController(t, sb, pool...) + ns := "hermes-obol-agent" + + claimAndSubmit(t, c, ns, "shadow-climb") + scores := map[string]int64{evalA: 90, evalB: 85, evalC: 80, "0xdddddddddddddddddddddddddddddddddddddddd": 88} + for addr, score := range scores { + annotateBounty(t, c, ns, "shadow-climb", map[string]string{ + "obol.org/eval-commit-" + addr: monetizeapi.EvalCommitHash(score, "salt-"+addr, addr), + }) + } + reconcileBountyUntilSettled(t, c, ns+"/shadow-climb") + for addr, score := range scores { + annotateBounty(t, c, ns, "shadow-climb", map[string]string{ + "obol.org/eval-reveal-" + addr: fmt.Sprintf(`{"score":%d,"salt":"salt-%s"}`, score, addr), + }) + } + reconcileBountyUntilSettled(t, c, ns+"/shadow-climb") + + record := ladderStatusOf(t, c, ns, "ev-shadow") + if record.ShadowAgreements != 1 { + t.Errorf("shadow within band must earn an agreement, got %+v", record) + } + if record.Tier != monetizeapi.EvaluatorTierShadow { + t.Errorf("one agreement must not yet promote (threshold 5), got tier %s", record.Tier) + } +} + +// The probation seat is half price and the discount goes to the POSTER: the +// reserved budget shrinks by per/2 when a probationer is seated. +func TestEvalBudgetTotal_ProbationDiscount(t *testing.T) { + sb := testEvalBounty("x") + sb.Spec.Eval.Payment.PerEvaluator = "2.00" + sb.Spec.Eval.K = 3 + + status := &monetizeapi.ServiceBountyStatus{} + if got := evalBudgetTotal(sb, status); got != "6.00" { + t.Errorf("all-full budget = %q, want 6.00", got) + } + + status.EvaluatorPanel = []monetizeapi.ServiceBountyPanelSeat{ + {Address: evalA, Seat: monetizeapi.PanelSeatFull}, + {Address: evalB, Seat: monetizeapi.PanelSeatFull}, + {Address: evalC, Seat: monetizeapi.PanelSeatProbation}, + } + if got := evalBudgetTotal(sb, status); got != "5.00" { + t.Errorf("probation-seated budget = %q, want 5.00 (2+2+1)", got) + } +} + +func TestLedgerGateway_CaptureBatch(t *testing.T) { + g := escrow.NewLedgerGateway() + if _, err := g.Reserve(context.Background(), escrow.ReserveRequest{ID: "b-eval", Asset: "OBOL", Amount: "6.00"}); err != nil { + t.Fatal(err) + } + receipt, err := g.CaptureBatch(context.Background(), "b-eval", []escrow.BatchRecipient{ + {Address: evalA, Amount: "2.00"}, {Address: evalB, Amount: "2.00"}, + }) + if err != nil { + t.Fatal(err) + } + if receipt.State != escrow.StateCaptured || !strings.Contains(receipt.TxHash, "batch[2]") { + t.Errorf("receipt = %+v, want Captured dev-ledger batch[2]", receipt) + } +} + +func ladderStatusOf(t *testing.T, c *Controller, namespace, name string) monetizeapi.EvaluatorLadderRecord { + t.Helper() + raw, err := c.dynClient.Resource(monetizeapi.EvaluatorEnrollmentGVR).Namespace(namespace).Get(context.Background(), name, metav1.GetOptions{}) + if err != nil { + t.Fatalf("get enrollment %s: %v", name, err) + } + var enrollment monetizeapi.EvaluatorEnrollment + if err := runtime.DefaultUnstructuredConverter.FromUnstructured(raw.Object, &enrollment); err != nil { + t.Fatalf("decode enrollment: %v", err) + } + if len(enrollment.Status.Records) == 0 { + return monetizeapi.EvaluatorLadderRecord{} + } + return enrollment.Status.Records[0] +} + +// ── decay-aware weighting ─────────────────────────────────────────────────── + +func TestLadderWeight_DecayAfterHalfLifeIdle(t *testing.T) { + now := time.Date(2026, 6, 10, 0, 0, 0, 0, time.UTC) + halfLife := 720 * time.Hour + fresh := monetizeapi.EvaluatorLadderRecord{ + Completed: 10, + LastEvalAt: &metav1.Time{Time: now}, + } + if got := ladderWeight(fresh, "", halfLife, now); math.Abs(got-2.0) > 1e-9 { + t.Fatalf("fresh weight = %v, want 2.0 (1 + 0.1×10)", got) + } + stale := monetizeapi.EvaluatorLadderRecord{ + Completed: 10, + LastEvalAt: &metav1.Time{Time: now.Add(-halfLife)}, + } + if got := ladderWeight(stale, "", halfLife, now); math.Abs(got-1.5) > 1e-9 { + t.Fatalf("one-half-life-idle weight = %v, want 1.5 (effective completed halves to 5)", got) + } + legacy := monetizeapi.EvaluatorLadderRecord{Completed: 10} // nil LastEvalAt → no decay + if got := ladderWeight(legacy, "", halfLife, now); math.Abs(got-2.0) > 1e-9 { + t.Fatalf("legacy record weight = %v, want undecayed 2.0", got) + } +} + +func TestLadderWeight_GroundedBonus(t *testing.T) { + now := time.Now() + halfLife := 720 * time.Hour + allGrounded := monetizeapi.EvaluatorLadderRecord{ + Completed: 4, + GroundedEvals: 4, + LastEvalAt: &metav1.Time{Time: now}, + } + if got := ladderWeight(allGrounded, "", halfLife, now); math.Abs(got-2.8) > 1e-9 { + t.Fatalf("fully grounded weight = %v, want 2.8 (1.4 × 2)", got) + } + halfGrounded := monetizeapi.EvaluatorLadderRecord{ + Completed: 4, + GroundedEvals: 2, + LastEvalAt: &metav1.Time{Time: now}, + } + if got := ladderWeight(halfGrounded, "", halfLife, now); math.Abs(got-2.1) > 1e-9 { + t.Fatalf("half grounded weight = %v, want 2.1 (1.4 × 1.5)", got) + } + // The bonus is capped at ×2 even if counters drift (grounded > completed). + overGrounded := monetizeapi.EvaluatorLadderRecord{ + Completed: 1, + GroundedEvals: 5, + LastEvalAt: &metav1.Time{Time: now}, + } + if got := ladderWeight(overGrounded, "", halfLife, now); math.Abs(got-2.2) > 1e-9 { + t.Fatalf("over-grounded weight = %v, want capped 2.2 (1.1 × 2)", got) + } +} + +// A stored Full whose reputation decayed below the probation threshold reads +// as Probation at selection time: it takes the reserved probation seat, never +// a full one. +func TestSelectEvaluatorPanel_StaleFullReadsAsProbation(t *testing.T) { + now := time.Date(2026, 6, 10, 0, 0, 0, 0, time.UTC) + staleAddr := "0x" + strings.Repeat("a", 40) + pool := []monetizeapi.EvaluatorEnrollment{} + for i := 0; i < 3; i++ { + pool = append(pool, monetizeapi.EvaluatorEnrollment{ + Spec: monetizeapi.EvaluatorEnrollmentSpec{Address: fmt.Sprintf("0x%040d", i), TaskTypes: []string{"benchmark@v1"}}, + Status: monetizeapi.EvaluatorEnrollmentStatus{Records: []monetizeapi.EvaluatorLadderRecord{ + {TaskType: "benchmark@v1", Tier: monetizeapi.EvaluatorTierFull, LastEvalAt: &metav1.Time{Time: now}}, + }}, + }) + } + pool = append(pool, monetizeapi.EvaluatorEnrollment{ + Spec: monetizeapi.EvaluatorEnrollmentSpec{Address: staleAddr, TaskTypes: []string{"benchmark@v1"}}, + Status: monetizeapi.EvaluatorEnrollmentStatus{Records: []monetizeapi.EvaluatorLadderRecord{ + { + TaskType: "benchmark@v1", + Tier: monetizeapi.EvaluatorTierFull, + Completed: 10, // effective ≈ 0.01 after 10 half-lives — under ProbationEvals 10 + LastEvalAt: &metav1.Time{Time: now.Add(-10 * 720 * time.Hour)}, + }, + }}, + }) + + seats := selectEvaluatorPanel(seedOf("uid"), pool, "benchmark@v1", 3, "5.00", testPanelLadder, "", now) + if seats == nil { + t.Fatal("3 fresh Full + 1 demoted probationer must still fill k=3") + } + for _, seat := range seats { + if strings.EqualFold(seat.Address, staleAddr) && seat.Seat != monetizeapi.PanelSeatProbation { + t.Fatalf("stale Full must hold the probation seat, got %s", seat.Seat) + } + if !strings.EqualFold(seat.Address, staleAddr) && seat.Seat == monetizeapi.PanelSeatProbation { + t.Fatalf("fresh Full %s must not hold the probation seat", seat.Address) + } + } +} + +// ── escalation panel ──────────────────────────────────────────────────────── + +func testEscalationBounty(name string) *monetizeapi.ServiceBounty { + return &monetizeapi.ServiceBounty{ + TypeMeta: metav1.TypeMeta{ + APIVersion: monetizeapi.Group + "/" + monetizeapi.Version, + Kind: monetizeapi.ServiceBountyKind, + }, + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: "hermes-obol-agent", + UID: "esc-uid-1", + CreationTimestamp: metav1.Date(2026, 1, 1, 0, 0, 0, 0, time.UTC), + }, + Spec: monetizeapi.ServiceBountySpec{ + Task: monetizeapi.ServiceBountyTask{TypeRef: "benchmark@v1"}, + Reward: monetizeapi.ServiceBountyReward{Amount: "5.00"}, + Eval: monetizeapi.ServiceBountyEval{K: 3}, + }, + } +} + +func TestSelectEscalationPanel_DeterministicAndExcludesRound0(t *testing.T) { + sb := testEscalationBounty("esc") + var enrollments []*unstructured.Unstructured + var addrs []string + for i := 0; i < 8; i++ { + addr := fmt.Sprintf("0x%040d", i) + addrs = append(addrs, addr) + enrollments = append(enrollments, testEnrollment(t, fmt.Sprintf("ev-%d", i), addr, monetizeapi.EvaluatorTierFull)) + } + c := newPanelTestController(t, sb, enrollments...) + sbObj := mustBountyObject(t, sb) + + // Round-0 panel members are excluded by canonical EIP-55 address. + exclude := map[string]bool{ + canonicalAddress(addrs[0]): true, + canonicalAddress(addrs[1]): true, + } + + first, err := c.selectEscalationPanel(context.Background(), sbObj, 5, exclude) + if err != nil { + t.Fatalf("selectEscalationPanel: %v", err) + } + second, err := c.selectEscalationPanel(context.Background(), sbObj, 5, exclude) + if err != nil { + t.Fatalf("selectEscalationPanel (second draw): %v", err) + } + if !reflect.DeepEqual(first, second) { + t.Fatalf("escalation panel must be deterministic:\n%v\n%v", first, second) + } + if len(first) != 5 { + t.Fatalf("got %d escalation seats, want 5", len(first)) + } + for _, seat := range first { + if seat.Seat != monetizeapi.PanelSeatFull { + t.Errorf("escalation seat %s = %q, want all counting/full-pay", seat.Address, seat.Seat) + } + if exclude[canonicalAddress(seat.Address)] { + t.Errorf("round-0 evaluator %s must be excluded from the escalation panel", seat.Address) + } + } +} + +func TestSelectEscalationPanel_OpenDoorWhenPoolThin(t *testing.T) { + sb := testEscalationBounty("esc-thin") + enrollments := []*unstructured.Unstructured{ + testEnrollment(t, "ev-0", "0x"+strings.Repeat("1", 40), monetizeapi.EvaluatorTierFull), + testEnrollment(t, "ev-1", "0x"+strings.Repeat("2", 40), monetizeapi.EvaluatorTierFull), + } + c := newPanelTestController(t, sb, enrollments...) + sbObj := mustBountyObject(t, sb) + exclude := map[string]bool{canonicalAddress("0x" + strings.Repeat("1", 40)): true} + + seats, err := c.selectEscalationPanel(context.Background(), sbObj, 5, exclude) + if err != nil { + t.Fatalf("selectEscalationPanel: %v", err) + } + if seats != nil { + t.Fatalf("thin escalation pool must fall back to open-door (nil seats), got %v", seats) + } +} + +// ── seed provenance in ensurePanel ────────────────────────────────────────── + +func TestEnsurePanel_PersistsLocalSeedProvenance(t *testing.T) { + sb := testEscalationBounty("seeded") + c := newPanelTestController(t, sb, + testEnrollment(t, "ev-a", evalA, monetizeapi.EvaluatorTierFull), + testEnrollment(t, "ev-b", evalB, monetizeapi.EvaluatorTierFull), + testEnrollment(t, "ev-c", evalC, monetizeapi.EvaluatorTierFull), + ) + status := &monetizeapi.ServiceBountyStatus{} + c.ensurePanel(context.Background(), sb, status) + + if status.PanelSeed == nil || status.PanelSeed.Source != "local" { + t.Fatalf("status.panelSeed = %+v, want Source=local", status.PanelSeed) + } + if status.PanelSeed.Round != 0 || status.PanelSeed.Randomness != "" || status.PanelSeed.Signature != "" { + t.Fatalf("local provenance must carry no beacon fields, got %+v", status.PanelSeed) + } + if len(status.EvaluatorPanel) == 0 { + t.Fatal("panel must be selected from the enrolled Full pool") + } +} + +func TestEnsurePanel_SeedErrorDoesNotLatch(t *testing.T) { + sb := testEscalationBounty("seed-err") + c := newPanelTestController(t, sb, + testEnrollment(t, "ev-a", evalA, monetizeapi.EvaluatorTierFull), + ) + failing := &failingSeedSource{} + c.seeds = failing + + status := &monetizeapi.ServiceBountyStatus{} + c.ensurePanel(context.Background(), sb, status) + + if status.PanelSeed != nil || status.EvaluatorPanel != nil { + t.Fatalf("seed failure must leave the panel unselected, got seed=%+v panel=%v", status.PanelSeed, status.EvaluatorPanel) + } + for _, condition := range status.Conditions { + if condition.Type == "PanelSelected" { + t.Fatal("seed failure must NOT latch PanelSelected — the next reconcile retries the beacon") + } + } + + // Not latched: the next reconcile consults the seed source again. + c.ensurePanel(context.Background(), sb, status) + if failing.calls != 2 { + t.Fatalf("seed source consulted %d times, want 2 (retry, no latch)", failing.calls) + } +} diff --git a/internal/serviceoffercontroller/bounty_structure_test.go b/internal/serviceoffercontroller/bounty_structure_test.go new file mode 100644 index 00000000..6f4b329c --- /dev/null +++ b/internal/serviceoffercontroller/bounty_structure_test.go @@ -0,0 +1,36 @@ +package serviceoffercontroller + +import ( + "os" + "regexp" + "testing" +) + +// TestBountyReconcile_NeverCreatesIngressOrSecrets pins the review invariant +// that a ServiceBounty must never become public ingress and the bounty pass +// must never broker credentials: the reconcile source must not touch +// HTTPRoute, Middleware, ReferenceGrant, or Secret resources. (The structural +// source-check style follows internal/x402/setup_structure_test.go.) The scan +// covers every file the bounty reconcile spans — escrow, eval market, panel +// selection, escalation, grounding, and seed sourcing all carry the same +// invariant. +func TestBountyReconcile_NeverCreatesIngressOrSecrets(t *testing.T) { + files := []string{ + "bounty.go", + "bounty_eval.go", + "bounty_panel.go", + "bounty_escalation.go", + "bounty_grounding.go", + "seed.go", + } + forbidden := regexp.MustCompile(`HTTPRouteGVR|MiddlewareGVR|ReferenceGrantGVR|SecretGVR|c\.httpRoutes|c\.middlewares|c\.referenceGrants`) + for _, file := range files { + src, err := os.ReadFile(file) + if err != nil { + t.Fatalf("read %s: %v", file, err) + } + if match := forbidden.Find(src); match != nil { + t.Fatalf("%s references %q — the bounty reconcile must never create routes, middlewares, reference grants, or secrets (a bounty must never become ingress)", file, match) + } + } +} diff --git a/internal/serviceoffercontroller/controller.go b/internal/serviceoffercontroller/controller.go index be6b7cfe..4347c150 100644 --- a/internal/serviceoffercontroller/controller.go +++ b/internal/serviceoffercontroller/controller.go @@ -16,6 +16,7 @@ import ( "github.com/ObolNetwork/obol-stack/internal/erc8004" "github.com/ObolNetwork/obol-stack/internal/monetizeapi" + "github.com/ObolNetwork/obol-stack/internal/x402/escrow" "github.com/ethereum/go-ethereum/common" "k8s.io/apimachinery/pkg/api/equality" apierrors "k8s.io/apimachinery/pkg/api/errors" @@ -67,14 +68,26 @@ type Controller struct { identityInformer cache.SharedIndexInformer purchaseInformer cache.SharedIndexInformer agentInformer cache.SharedIndexInformer + bountyInformer cache.SharedIndexInformer configMapInformer cache.SharedIndexInformer offerQueue workqueue.TypedRateLimitingInterface[string] registrationQueue workqueue.TypedRateLimitingInterface[string] identityQueue workqueue.TypedRateLimitingInterface[string] purchaseQueue workqueue.TypedRateLimitingInterface[string] agentQueue workqueue.TypedRateLimitingInterface[string] + bountyQueue workqueue.TypedRateLimitingInterface[string] catalogMu sync.Mutex + // bountyEscrow is the Hold/Release/Refund seam for ServiceBounty rewards. + // Configured at construction (env), never from a bounty's spec — see + // newBountyEscrowGateway for why. + bountyEscrow escrow.Gateway + + // seeds produces the evaluator panel-lottery seed (local sha256(UID) or + // drand quicknet, selected by OBOL_BOUNTY_SEED at construction — never + // from a bounty's spec). Nil falls back to the local source (tests). + seeds seedSource + pendingAuths sync.Map // key: "ns/name" → []map[string]string httpClient *http.Client @@ -107,6 +120,27 @@ func New(cfg *rest.Config) (*Controller, error) { identityInformer := factory.ForResource(monetizeapi.AgentIdentityGVR).Informer() purchaseInformer := factory.ForResource(monetizeapi.PurchaseRequestGVR).Informer() agentInformer := factory.ForResource(monetizeapi.AgentGVR).Informer() + + // ServiceBounty is newer than the other CRDs. Guard on discovery so a + // controller image rolled onto a cluster that hasn't applied the CRD yet + // degrades to a log line instead of blocking every informer cache sync. + // Only a definitive "group served, resource absent" answer disables the + // pass — a transient discovery error keeps it on (the CRD ships in the + // same release train). + var bountyInformer cache.SharedIndexInformer + if resources, err := kubeClient.Discovery().ServerResourcesForGroupVersion(monetizeapi.Group + "/" + monetizeapi.Version); err == nil { + for _, r := range resources.APIResources { + if r.Name == monetizeapi.ServiceBountyResource { + bountyInformer = factory.ForResource(monetizeapi.ServiceBountyGVR).Informer() + break + } + } + if bountyInformer == nil { + log.Printf("serviceoffer-controller: ServiceBounty CRD not installed; bounty reconcile disabled") + } + } else { + bountyInformer = factory.ForResource(monetizeapi.ServiceBountyGVR).Informer() + } configMapFactory := dynamicinformer.NewFilteredDynamicSharedInformerFactory(client, 0, "obol-frontend", func(options *metav1.ListOptions) { options.FieldSelector = fields.OneTermEqualSelector("metadata.name", "obol-stack-config").String() }) @@ -131,12 +165,16 @@ func New(cfg *rest.Config) (*Controller, error) { identityInformer: identityInformer, purchaseInformer: purchaseInformer, agentInformer: agentInformer, + bountyInformer: bountyInformer, configMapInformer: configMapInformer, offerQueue: workqueue.NewTypedRateLimitingQueue(workqueue.DefaultTypedControllerRateLimiter[string]()), registrationQueue: workqueue.NewTypedRateLimitingQueue(workqueue.DefaultTypedControllerRateLimiter[string]()), identityQueue: workqueue.NewTypedRateLimitingQueue(workqueue.DefaultTypedControllerRateLimiter[string]()), purchaseQueue: workqueue.NewTypedRateLimitingQueue(workqueue.DefaultTypedControllerRateLimiter[string]()), agentQueue: workqueue.NewTypedRateLimitingQueue(workqueue.DefaultTypedControllerRateLimiter[string]()), + bountyQueue: workqueue.NewTypedRateLimitingQueue(workqueue.DefaultTypedControllerRateLimiter[string]()), + bountyEscrow: newBountyEscrowGateway(), + seeds: newSeedSource(), httpClient: &http.Client{Timeout: 3 * time.Second}, registrationRPCBase: getenvDefault("ERC8004_RPC_BASE", erc8004.DefaultRPCBase), baseURLOverride: strings.TrimRight(os.Getenv("AGENT_BASE_URL"), "/"), @@ -201,6 +239,13 @@ func New(cfg *rest.Config) (*Controller, error) { UpdateFunc: func(_, newObj any) { controller.enqueueDiscoveryRefresh(newObj) }, DeleteFunc: controller.enqueueDiscoveryRefresh, }) + if bountyInformer != nil { + bountyInformer.AddEventHandler(cache.ResourceEventHandlerFuncs{ + AddFunc: controller.enqueueBounty, + UpdateFunc: func(_, newObj any) { controller.enqueueBounty(newObj) }, + DeleteFunc: controller.enqueueBounty, + }) + } return controller, nil } @@ -211,6 +256,7 @@ func (c *Controller) Run(ctx context.Context, workers int) error { defer c.identityQueue.ShutDown() defer c.purchaseQueue.ShutDown() defer c.agentQueue.ShutDown() + defer c.bountyQueue.ShutDown() go c.offerInformer.Run(ctx.Done()) go c.registrationInformer.Run(ctx.Done()) @@ -218,14 +264,19 @@ func (c *Controller) Run(ctx context.Context, workers int) error { go c.purchaseInformer.Run(ctx.Done()) go c.agentInformer.Run(ctx.Done()) go c.configMapInformer.Run(ctx.Done()) - if !cache.WaitForCacheSync(ctx.Done(), + syncs := []cache.InformerSynced{ c.offerInformer.HasSynced, c.registrationInformer.HasSynced, c.identityInformer.HasSynced, c.purchaseInformer.HasSynced, c.agentInformer.HasSynced, c.configMapInformer.HasSynced, - ) { + } + if c.bountyInformer != nil { + go c.bountyInformer.Run(ctx.Done()) + syncs = append(syncs, c.bountyInformer.HasSynced) + } + if !cache.WaitForCacheSync(ctx.Done(), syncs...) { return fmt.Errorf("wait for informer sync") } @@ -257,6 +308,12 @@ func (c *Controller) Run(ctx context.Context, workers int) error { for c.processNextAgent(ctx) { } }() + if c.bountyInformer != nil { + go func() { + for c.processNextBounty(ctx) { + } + }() + } } <-ctx.Done() @@ -455,6 +512,42 @@ func (c *Controller) reconcileOffer(ctx context.Context, key string) error { } } + if offer.IsSkill() { + ok, skillErr := c.reconcileSkillBundle(ctx, &status, offer) + if skillErr != nil { + return skillErr + } + if !ok { + // reconcileSkillBundle already set UpstreamHealthy=False with a + // specific reason (BundleMissing / BundleTooLarge / + // BundleHashMismatch / InvalidSkillUpstream / ...). Mirror the + // WaitingForAgent early return: park the downstream gates, commit + // status, refresh the catalog, and poll — no informer watches the + // operator's bundle ConfigMap, so a later kubectl apply of the + // bundle would otherwise never re-enqueue this offer. + setCondition(&status, "ModelReady", "True", "Skipped", "Skill offer does not require model preparation") + if offer.DrainExpired(time.Now()) { + if err := c.deleteRouteChildren(ctx, offer); err != nil { + return err + } + setCondition(&status, "Draining", "False", "Drained", fmt.Sprintf("Drain ended at %s; route torn down", offer.DrainEndsAt().UTC().Format(time.RFC3339))) + setCondition(&status, "PaymentGateReady", "False", "Drained", "Offer drained; payment gate removed") + setCondition(&status, "RoutePublished", "False", "Drained", "Offer drained; route removed") + } else { + setCondition(&status, "PaymentGateReady", "False", "WaitingForUpstream", "Waiting for a valid skill bundle before publishing payment gate") + setCondition(&status, "RoutePublished", "False", "WaitingForPaymentGate", "Waiting for payment gate before publishing route") + } + setCondition(&status, "Ready", "False", "Reconciling", "Offer is not fully reconciled yet") + if err := c.updateOfferStatus(ctx, raw, status); err != nil { + return err + } + c.offerQueue.AddAfter(offer.Namespace+"/"+offer.Name, 5*time.Second) + freshOffer := *offer + freshOffer.Status = status + return c.reconcileSkillCatalog(ctx, &freshOffer) + } + } + if err := c.reconcileModel(&status, offer); err != nil { return err } diff --git a/internal/serviceoffercontroller/seed.go b/internal/serviceoffercontroller/seed.go new file mode 100644 index 00000000..dadd547f --- /dev/null +++ b/internal/serviceoffercontroller/seed.go @@ -0,0 +1,234 @@ +package serviceoffercontroller + +// Panel-draw randomness sources (design doc §11.4). The evaluator panel is a +// weighted lottery; whoever controls the lottery seed controls the panel, so +// the seed's provenance is recorded in status.panelSeed for auditability. +// +// - local: sha256(bounty UID) — deterministic, free, fine for local-first +// single-operator stacks (exactly the historical behavior). +// - drand: the quicknet beacon FIRST round strictly after the bounty's +// creation +30s, fetched over public HTTP relays and BLS-verified against +// the quicknet group key. The poster cannot know the randomness when the +// bounty is created, and the operator cannot grind it: a fetch or verify +// failure returns an error and the panel stays unselected (requeue) — it +// NEVER silently falls back to the local seed, because "break the relay, +// get the predictable seed" would hand the operator a grinding lever. +// +// Mode is selected once at controller construction from OBOL_BOUNTY_SEED +// ("drand" → drand, anything else → local); relays are overridable via +// OBOL_BOUNTY_DRAND_URLS (comma-separated). + +import ( + "context" + "crypto/sha256" + "encoding/binary" + "encoding/hex" + "encoding/json" + "errors" + "fmt" + "net/http" + "os" + "strings" + "time" + + bls12381 "github.com/drand/kyber-bls12381" + "github.com/drand/kyber/sign/bdn" + + "github.com/ObolNetwork/obol-stack/internal/monetizeapi" +) + +// seedSource produces the 32-byte panel-lottery seed for a bounty plus the +// provenance record persisted into status.panelSeed. +type seedSource interface { + Seed(ctx context.Context, uid string, created time.Time) ([32]byte, monetizeapi.ServiceBountyPanelSeed, error) +} + +const ( + seedModeEnv = "OBOL_BOUNTY_SEED" + drandRelaysEnv = "OBOL_BOUNTY_DRAND_URLS" + seedSourceLocal = "local" + seedSourceDrand = "drand" + // seedRetryDelay is how long ensurePanel waits before re-trying a bounty + // whose beacon fetch/verify failed. + seedRetryDelay = 15 * time.Second +) + +// newSeedSource picks the seed source from the environment. Called once at +// controller construction. +func newSeedSource() seedSource { + if os.Getenv(seedModeEnv) == seedSourceDrand { + return newDrandSeedSource(nil) + } + return localSeedSource{} +} + +// localSeedSource is the historical deterministic seed: sha256(bounty UID). +type localSeedSource struct{} + +func (localSeedSource) Seed(_ context.Context, uid string, _ time.Time) ([32]byte, monetizeapi.ServiceBountyPanelSeed, error) { + return sha256.Sum256([]byte(uid)), monetizeapi.ServiceBountyPanelSeed{Source: seedSourceLocal}, nil +} + +// ── drand quicknet ────────────────────────────────────────────────────────── +// +// Chain parameters verified live against https://api.drand.sh/v2/beacons/quicknet/info +// (2026-06-10): scheme bls-unchained-g1-rfc9380 — signatures on G1, group +// public key on G2, signed message = sha256(8-byte big-endian round number) +// (drand/drand crypto/schemes.go, "unchained means we're only hashing the +// round number"). randomness = sha256(signature). +// +// Relay paths: api.drand.sh serves both /v2/beacons/quicknet/rounds/ and +// the chain-hash path //public/; drand.cloudflare.com serves +// ONLY the chain-hash path (v2 404s, verified live). The chain-hash path is +// therefore what we fetch — it works on every default relay and pins the +// chain hash into the URL itself. +const ( + quicknetChainHash = "52db9ba70e0cc0f6eaf7803dd07447a1f5477735fd3f661792ba94600c84e971" + quicknetGenesisUnix = int64(1692803367) + quicknetPeriodSec = int64(3) + quicknetPublicKeyHex = "83cf0f2896adee7eb8b5f01fcad3912212c437e0073e911fb90022d3e760183c8c4b450b6a0a6c3ac6a5776a2d1064510d1fec758c921cc22b0e17e63aaf4bcb5ed66304de9cf809bd274ca73bab4af5a6e9c76a4bc09e76eae8991ef5ece45a" + + // drandSeedLag: the panel draws from the first beacon strictly after + // created+lag, so the randomness provably does not exist yet when the + // bounty is posted. + drandSeedLag = 30 * time.Second +) + +var defaultDrandRelays = []string{"https://api.drand.sh", "https://drand.cloudflare.com"} + +type drandSeedSource struct { + relays []string + client *http.Client +} + +// newDrandSeedSource builds the quicknet-backed source. relays == nil reads +// OBOL_BOUNTY_DRAND_URLS, then falls back to the public defaults. +func newDrandSeedSource(relays []string) *drandSeedSource { + if len(relays) == 0 { + if env := os.Getenv(drandRelaysEnv); env != "" { + for _, u := range strings.Split(env, ",") { + if u = strings.TrimSpace(u); u != "" { + relays = append(relays, strings.TrimRight(u, "/")) + } + } + } + } + if len(relays) == 0 { + relays = defaultDrandRelays + } + return &drandSeedSource{ + relays: relays, + client: &http.Client{Timeout: 10 * time.Second}, + } +} + +// drandBeacon is the relay response on the chain-hash path +// (//public/). Randomness is present on this path, but we +// recompute and cross-check it from the signature anyway. +type drandBeacon struct { + Round uint64 `json:"round"` + Randomness string `json:"randomness"` + Signature string `json:"signature"` +} + +// drandRoundAfter returns the first quicknet round emitted STRICTLY after t. +// Round r is emitted at genesis + (r-1)×period. +func drandRoundAfter(t time.Time) uint64 { + d := t.Unix() - quicknetGenesisUnix + if d < 0 { + return 1 + } + return uint64(d/quicknetPeriodSec) + 2 +} + +func (s *drandSeedSource) Seed(ctx context.Context, uid string, created time.Time) ([32]byte, monetizeapi.ServiceBountyPanelSeed, error) { + round := drandRoundAfter(created.Add(drandSeedLag)) + + var errs []error + for _, relay := range s.relays { + beacon, err := s.fetch(ctx, relay, round) + if err != nil { + errs = append(errs, fmt.Errorf("%s: %w", relay, err)) + continue + } + randomness, err := verifyQuicknetBeacon(beacon, round) + if err != nil { + // A relay serving a beacon that fails BLS verification is lying + // or corrupted — surface it, never trust it. + errs = append(errs, fmt.Errorf("%s: %w", relay, err)) + continue + } + seed := sha256.Sum256(append([]byte(uid), randomness...)) + return seed, monetizeapi.ServiceBountyPanelSeed{ + Source: seedSourceDrand, + Round: round, + Randomness: hex.EncodeToString(randomness), + Signature: beacon.Signature, + }, nil + } + // No silent fallback to the local seed — a broken relay must never become + // a seed-grinding lever. The caller leaves the panel unselected and the + // controller requeues. + return [32]byte{}, monetizeapi.ServiceBountyPanelSeed{}, fmt.Errorf("drand round %d unavailable from all relays: %w", round, errors.Join(errs...)) +} + +func (s *drandSeedSource) fetch(ctx context.Context, relay string, round uint64) (*drandBeacon, error) { + url := fmt.Sprintf("%s/%s/public/%d", strings.TrimRight(relay, "/"), quicknetChainHash, round) + req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil) + if err != nil { + return nil, err + } + resp, err := s.client.Do(req) + if err != nil { + return nil, err + } + defer resp.Body.Close() + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("status %d", resp.StatusCode) + } + var beacon drandBeacon + if err := json.NewDecoder(resp.Body).Decode(&beacon); err != nil { + return nil, fmt.Errorf("decode beacon: %w", err) + } + return &beacon, nil +} + +// verifyQuicknetBeacon BLS-verifies the beacon signature against the quicknet +// group key (scheme bls-unchained-g1-rfc9380: signature on G1, key on G2, +// message = sha256(big-endian round)) and returns the verified randomness +// (sha256 of the signature). +func verifyQuicknetBeacon(beacon *drandBeacon, wantRound uint64) ([]byte, error) { + if beacon.Round != wantRound { + return nil, fmt.Errorf("relay returned round %d, want %d", beacon.Round, wantRound) + } + sig, err := hex.DecodeString(beacon.Signature) + if err != nil { + return nil, fmt.Errorf("decode signature: %w", err) + } + + suite := bls12381.NewBLS12381Suite() + pubBytes, err := hex.DecodeString(quicknetPublicKeyHex) + if err != nil { + return nil, fmt.Errorf("decode quicknet group key: %w", err) + } + pub := suite.G2().Point() + if err := pub.UnmarshalBinary(pubBytes); err != nil { + return nil, fmt.Errorf("unmarshal quicknet group key: %w", err) + } + + var roundBytes [8]byte + binary.BigEndian.PutUint64(roundBytes[:], beacon.Round) + msg := sha256.Sum256(roundBytes[:]) + // bdn over the deprecated sign/bls: identical single-signature Verify; + // the bls deprecation concerns rogue-key attacks on AGGREGATION, which a + // fixed group key + single beacon signature never exercises. + if err := bdn.NewSchemeOnG1(suite).Verify(pub, msg[:], sig); err != nil { + return nil, fmt.Errorf("BLS verify round %d: %w", beacon.Round, err) + } + + randomness := sha256.Sum256(sig) + if beacon.Randomness != "" && !strings.EqualFold(beacon.Randomness, hex.EncodeToString(randomness[:])) { + return nil, fmt.Errorf("relay randomness does not match sha256(signature) for round %d", beacon.Round) + } + return randomness[:], nil +} diff --git a/internal/serviceoffercontroller/seed_test.go b/internal/serviceoffercontroller/seed_test.go new file mode 100644 index 00000000..94b37616 --- /dev/null +++ b/internal/serviceoffercontroller/seed_test.go @@ -0,0 +1,214 @@ +package serviceoffercontroller + +import ( + "context" + "crypto/sha256" + "encoding/hex" + "errors" + "fmt" + "net/http" + "net/http/httptest" + "reflect" + "strings" + "testing" + "time" + + "github.com/ethereum/go-ethereum/common" + + "github.com/ObolNetwork/obol-stack/internal/monetizeapi" +) + +// canonicalAddress is the one true (EIP-55) form used for panel/exclusion +// keys throughout the eval market. +func canonicalAddress(addr string) string { + return common.HexToAddress(addr).Hex() +} + +// failingSeedSource simulates an unreachable / lying drand relay set. +type failingSeedSource struct{ calls int } + +func (f *failingSeedSource) Seed(context.Context, string, time.Time) ([32]byte, monetizeapi.ServiceBountyPanelSeed, error) { + f.calls++ + return [32]byte{}, monetizeapi.ServiceBountyPanelSeed{}, errors.New("relay down") +} + +// Real quicknet beacon, recorded once from +// https://api.drand.sh/52db9b…/public/1000 (2026-06-10) and BLS-verified at +// recording time. Round 1000 is emitted at genesis + 999×3s = 1692806364. +const ( + fixtureRound = uint64(1000) + fixtureSignature = "b44679b9a59af2ec876b1a6b1ad52ea9b1615fc3982b19576350f93447cb1125e342b73a8dd2bacbe47e4b6b63ed5e39" + fixtureRandomness = "fe290beca10872ef2fb164d2aa4442de4566183ec51c56ff3cd603d930e54fdd" + // fixtureCreatedUnix +30s lag = 1692806361 → first round strictly after + // is round 1000 (emitted at 1692806364). + fixtureCreatedUnix = int64(1692806331) +) + +func fixtureRelay(t *testing.T, body string, status int) *httptest.Server { + t.Helper() + wantPath := fmt.Sprintf("/%s/public/%d", quicknetChainHash, fixtureRound) + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.URL.Path != wantPath { + t.Errorf("relay fetched %s, want %s", r.URL.Path, wantPath) + http.NotFound(w, r) + return + } + w.WriteHeader(status) + fmt.Fprint(w, body) + })) + t.Cleanup(server.Close) + return server +} + +func fixtureBody(round uint64, randomness, signature string) string { + return fmt.Sprintf(`{"round":%d,"randomness":"%s","signature":"%s"}`, round, randomness, signature) +} + +func TestLocalSeedSource_ProvenancePinned(t *testing.T) { + seed, provenance, err := localSeedSource{}.Seed(context.Background(), "uid-42", time.Now()) + if err != nil { + t.Fatal(err) + } + if want := sha256.Sum256([]byte("uid-42")); seed != want { + t.Fatalf("local seed must be exactly sha256(uid): got %x want %x", seed, want) + } + if want := (monetizeapi.ServiceBountyPanelSeed{Source: "local"}); !reflect.DeepEqual(provenance, want) { + t.Fatalf("local provenance = %+v, want %+v", provenance, want) + } +} + +func TestNewSeedSource_EnvModeSelection(t *testing.T) { + t.Setenv(seedModeEnv, "drand") + if _, ok := newSeedSource().(*drandSeedSource); !ok { + t.Fatal("OBOL_BOUNTY_SEED=drand must select the drand source") + } + t.Setenv(seedModeEnv, "") + if _, ok := newSeedSource().(localSeedSource); !ok { + t.Fatal("unset/other OBOL_BOUNTY_SEED must select the local source") + } + t.Setenv(seedModeEnv, "anything-else") + if _, ok := newSeedSource().(localSeedSource); !ok { + t.Fatal("unrecognized OBOL_BOUNTY_SEED must select the local source") + } +} + +func TestNewDrandSeedSource_RelayEnvOverride(t *testing.T) { + t.Setenv(drandRelaysEnv, " https://relay-a.example/ ,https://relay-b.example") + src := newDrandSeedSource(nil) + want := []string{"https://relay-a.example", "https://relay-b.example"} + if !reflect.DeepEqual(src.relays, want) { + t.Fatalf("relays = %v, want %v", src.relays, want) + } + t.Setenv(drandRelaysEnv, "") + if src := newDrandSeedSource(nil); !reflect.DeepEqual(src.relays, defaultDrandRelays) { + t.Fatalf("relays = %v, want defaults %v", src.relays, defaultDrandRelays) + } +} + +func TestDrandRoundAfter(t *testing.T) { + genesis := time.Unix(quicknetGenesisUnix, 0) + cases := []struct { + t time.Time + want uint64 + }{ + {genesis.Add(-time.Hour), 1}, // before genesis → first beacon + {genesis, 2}, // round 1 is AT genesis, not strictly after + {genesis.Add(1 * time.Second), 2}, // round 2 at genesis+3s + {genesis.Add(3 * time.Second), 3}, // exactly on round 2 → next + {time.Unix(fixtureCreatedUnix+30, 0), fixtureRound}, // the fixture anchor + } + for _, c := range cases { + if got := drandRoundAfter(c.t); got != c.want { + t.Errorf("drandRoundAfter(%s) = %d, want %d", c.t, got, c.want) + } + } +} + +func TestDrandSeedSource_RealFixtureVerifies(t *testing.T) { + server := fixtureRelay(t, fixtureBody(fixtureRound, fixtureRandomness, fixtureSignature), http.StatusOK) + src := newDrandSeedSource([]string{server.URL}) + + seed, provenance, err := src.Seed(context.Background(), "uid-7", time.Unix(fixtureCreatedUnix, 0)) + if err != nil { + t.Fatalf("Seed on the recorded quicknet beacon must verify: %v", err) + } + if provenance.Source != "drand" || provenance.Round != fixtureRound || + provenance.Randomness != fixtureRandomness || provenance.Signature != fixtureSignature { + t.Fatalf("provenance = %+v, want the recorded beacon", provenance) + } + randomness, _ := hex.DecodeString(fixtureRandomness) + if want := sha256.Sum256(append([]byte("uid-7"), randomness...)); seed != want { + t.Fatalf("seed = %x, want sha256(uid || randomness) = %x", seed, want) + } +} + +func TestDrandSeedSource_FlippedSignatureBitFails(t *testing.T) { + // Flip one bit in the last signature byte: 0x39 → 0x38. + tampered := fixtureSignature[:len(fixtureSignature)-1] + "8" + tamperedRandomness := sha256.Sum256(mustHex(t, tampered)) + server := fixtureRelay(t, fixtureBody(fixtureRound, hex.EncodeToString(tamperedRandomness[:]), tampered), http.StatusOK) + src := newDrandSeedSource([]string{server.URL}) + + _, _, err := src.Seed(context.Background(), "uid-7", time.Unix(fixtureCreatedUnix, 0)) + if err == nil { + t.Fatal("a flipped signature bit must fail BLS verification") + } + if !strings.Contains(err.Error(), "BLS verify") { + t.Fatalf("error must come from BLS verification, got: %v", err) + } +} + +func TestDrandSeedSource_TamperedRandomnessFails(t *testing.T) { + tampered := "ff" + fixtureRandomness[2:] + server := fixtureRelay(t, fixtureBody(fixtureRound, tampered, fixtureSignature), http.StatusOK) + src := newDrandSeedSource([]string{server.URL}) + + if _, _, err := src.Seed(context.Background(), "uid-7", time.Unix(fixtureCreatedUnix, 0)); err == nil { + t.Fatal("relay randomness that is not sha256(signature) must be rejected") + } +} + +func TestDrandSeedSource_WrongRoundFails(t *testing.T) { + server := fixtureRelay(t, fixtureBody(fixtureRound+1, fixtureRandomness, fixtureSignature), http.StatusOK) + src := newDrandSeedSource([]string{server.URL}) + + if _, _, err := src.Seed(context.Background(), "uid-7", time.Unix(fixtureCreatedUnix, 0)); err == nil { + t.Fatal("a relay answering with the wrong round must be rejected") + } +} + +func TestDrandSeedSource_AllRelaysDownErrorsNoLocalFallback(t *testing.T) { + server := fixtureRelay(t, `{"error":"boom"}`, http.StatusInternalServerError) + src := newDrandSeedSource([]string{server.URL, server.URL}) + + seed, provenance, err := src.Seed(context.Background(), "uid-7", time.Unix(fixtureCreatedUnix, 0)) + if err == nil { + t.Fatal("drand mode must surface relay failure — NEVER fall back to the local seed") + } + if seed != ([32]byte{}) || provenance.Source != "" { + t.Fatalf("failure must return zero seed/provenance, got %x / %+v", seed, provenance) + } +} + +func TestDrandSeedSource_SecondRelayServes(t *testing.T) { + down := fixtureRelay(t, "", http.StatusBadGateway) + up := fixtureRelay(t, fixtureBody(fixtureRound, fixtureRandomness, fixtureSignature), http.StatusOK) + src := newDrandSeedSource([]string{down.URL, up.URL}) + + _, provenance, err := src.Seed(context.Background(), "uid-7", time.Unix(fixtureCreatedUnix, 0)) + if err != nil { + t.Fatalf("second relay must serve when the first is down: %v", err) + } + if provenance.Round != fixtureRound { + t.Fatalf("provenance round = %d, want %d", provenance.Round, fixtureRound) + } +} + +func mustHex(t *testing.T, s string) []byte { + t.Helper() + b, err := hex.DecodeString(s) + if err != nil { + t.Fatal(err) + } + return b +} diff --git a/internal/serviceoffercontroller/skill.go b/internal/serviceoffercontroller/skill.go new file mode 100644 index 00000000..0bb9d938 --- /dev/null +++ b/internal/serviceoffercontroller/skill.go @@ -0,0 +1,120 @@ +package serviceoffercontroller + +import ( + "context" + "crypto/sha256" + "encoding/base64" + "encoding/hex" + "fmt" + "strings" + + "github.com/ObolNetwork/obol-stack/internal/monetizeapi" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" +) + +// reconcileSkillBundle validates a type=skill offer's bundle ConfigMap and, +// when the artifact checks out, renders the bundle-server children +// (meta ConfigMap + Deployment + Service) in the offer's namespace. +// +// Returns ok=true when the children were applied and the rest of the +// condition ladder (reconcileUpstream → PaymentGateReady → RoutePublished +// → Registered → Ready) should proceed unchanged. Returns ok=false with a +// nil error when the offer is not yet publishable; in that case status +// already carries UpstreamHealthy=False with one of the specific reasons: +// +// - InvalidSkillSpec — required spec.skill fields missing (defense +// in depth behind the CRD's CEL rule) +// - InvalidSkillUpstream — spec.upstream does not point at the +// controller-rendered bundle server. Anti-spoof: a skill offer may +// only ever advertise its own bundle server, so the sha256 surfaced +// in the 402 extra can never describe a different upstream. +// - BundleMissing — bundle ConfigMap or its binaryData key absent +// - BundleInvalid — binaryData is not decodable base64 +// - BundleTooLarge — compressed bytes exceed MaxSkillBundleBytes +// - BundleHashMismatch — sha256 of the bytes != spec.skill.sha256 +// +// Errors are only returned for transient API failures (the caller's +// rate-limited requeue handles those). +func (c *Controller) reconcileSkillBundle(ctx context.Context, status *monetizeapi.ServiceOfferStatus, offer *monetizeapi.ServiceOffer) (bool, error) { + skill := offer.Spec.Skill + if skill.Name == "" || skill.Version == "" || skill.SHA256 == "" || skill.BundleConfigMap == "" { + setCondition(status, "UpstreamHealthy", "False", "InvalidSkillSpec", + "type=skill offer requires spec.skill.name, .version, .sha256 and .bundleConfigMap") + return false, nil + } + + workload := monetizeapi.SkillBundleWorkloadName(offer.Name) + if offer.Spec.Upstream.Service != workload || + offer.EffectiveNamespace() != offer.Namespace || + offer.EffectivePort() != skillBundlePort { + setCondition(status, "UpstreamHealthy", "False", "InvalidSkillUpstream", + fmt.Sprintf("type=skill offers must use the controller-rendered bundle server %s.%s:%d as upstream", workload, offer.Namespace, skillBundlePort)) + return false, nil + } + + raw, err := c.configMaps.Namespace(offer.Namespace).Get(ctx, skill.BundleConfigMap, metav1.GetOptions{}) + if apierrors.IsNotFound(err) { + setCondition(status, "UpstreamHealthy", "False", "BundleMissing", + fmt.Sprintf("bundle ConfigMap %s/%s not found", offer.Namespace, skill.BundleConfigMap)) + return false, nil + } + if err != nil { + return false, err + } + + encoded, found, err := unstructured.NestedString(raw.Object, "binaryData", monetizeapi.SkillBundleKey) + if err != nil || !found || encoded == "" { + setCondition(status, "UpstreamHealthy", "False", "BundleMissing", + fmt.Sprintf("bundle ConfigMap %s/%s has no binaryData[%q]", offer.Namespace, skill.BundleConfigMap, monetizeapi.SkillBundleKey)) + return false, nil + } + + bundle, err := base64.StdEncoding.DecodeString(encoded) + if err != nil { + setCondition(status, "UpstreamHealthy", "False", "BundleInvalid", + fmt.Sprintf("bundle ConfigMap %s/%s binaryData[%q] is not valid base64: %v", offer.Namespace, skill.BundleConfigMap, monetizeapi.SkillBundleKey, err)) + return false, nil + } + + if len(bundle) > monetizeapi.MaxSkillBundleBytes { + setCondition(status, "UpstreamHealthy", "False", "BundleTooLarge", + fmt.Sprintf("bundle is %d bytes; the cap is %d bytes of compressed artifact", len(bundle), monetizeapi.MaxSkillBundleBytes)) + return false, nil + } + + sum := sha256.Sum256(bundle) + got := hex.EncodeToString(sum[:]) + if !strings.EqualFold(got, skill.SHA256) { + setCondition(status, "UpstreamHealthy", "False", "BundleHashMismatch", + fmt.Sprintf("bundle sha256 %s does not match spec.skill.sha256 %s", got, strings.ToLower(skill.SHA256))) + return false, nil + } + + meta, err := buildSkillBundleMetaConfigMap(offer) + if err != nil { + return false, err + } + for _, child := range []*unstructured.Unstructured{ + meta, + buildSkillBundleDeployment(offer), + buildSkillBundleService(offer), + } { + // applyAgentObject (get-or-create-or-update) rather than the SSA + // applyObject so the same code path is exercised by the fake + // dynamic client in unit tests — see the rationale on + // applyAgentObject. All three kinds are mutable (not in + // isCreateOnlyKind), so re-reconciles pick up rendered changes. + if err := c.applyAgentObject(ctx, c.resourceFor(child), child); err != nil { + setCondition(status, "UpstreamHealthy", "False", "ApplyFailed", err.Error()) + return false, err + } + } + + // Children applied. The actual UpstreamHealthy verdict is owned by the + // shared reconcileUpstream, which health-checks the bundle Service at + // spec.upstream (http://so--bundle..svc:8080/skill.json), so + // the gate only opens once the httpd pod really serves the artifact. + return true, nil +} diff --git a/internal/serviceoffercontroller/skill_render.go b/internal/serviceoffercontroller/skill_render.go new file mode 100644 index 00000000..9a4a6869 --- /dev/null +++ b/internal/serviceoffercontroller/skill_render.go @@ -0,0 +1,226 @@ +package serviceoffercontroller + +import ( + "encoding/json" + "fmt" + "strings" + + "github.com/ObolNetwork/obol-stack/internal/monetizeapi" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" +) + +// skillBundlePort is the fixed port the controller-rendered skill bundle +// server listens on. The CLI pins spec.upstream.port to this value and +// reconcileSkillBundle rejects anything else (anti-spoof guard — see the +// InvalidSkillUpstream branch). +const skillBundlePort = int64(8080) + +// skillBundleHTTPDConf maps the two file extensions the bundle server +// serves to their MIME types (busybox httpd /etc/httpd.conf format). +const skillBundleHTTPDConf = ".tar.gz:application/gzip\n.json:application/json\n" + +// skillBundleMetaName returns the name of the controller-rendered metadata +// ConfigMap (skill.json + httpd.conf) that sits next to the operator's +// bundle ConfigMap. Equals SkillBundleWorkloadName(offerName)+"-meta" for +// every name that fits the 253-char DNS-subdomain limit; pathological +// names go through the shared safeName truncate+hash fallback instead of +// blindly appending past the limit. +func skillBundleMetaName(offerName string) string { + return safeName("so-", offerName, "-bundle-meta") +} + +// skillBundleLabels is the shared label set for the bundle server children +// (Deployment selector/template, Service selector, meta ConfigMap). Same +// shape as agentIdentityLabels / the skill catalog labels. +func skillBundleLabels(offer *monetizeapi.ServiceOffer) map[string]any { + return map[string]any{ + "app": monetizeapi.SkillBundleWorkloadName(offer.Name), + "obol.org/managed-by": "serviceoffer-controller", + } +} + +// skillBundleDocument is the machine-readable descriptor served at +// /skill.json next to the artifact. It doubles as the upstream health +// check target (the CLI pins spec.upstream.healthPath to /skill.json), so +// UpstreamHealthy only goes True once the bundle server actually serves +// the descriptor for the validated bundle. +type skillBundleDocument struct { + Name string `json:"name"` + Version string `json:"version"` + SHA256 string `json:"sha256"` + DisplayName string `json:"displayName,omitempty"` + Description string `json:"description,omitempty"` + Offer string `json:"offer"` + Namespace string `json:"namespace"` +} + +func buildSkillBundleJSON(offer *monetizeapi.ServiceOffer) (string, error) { + document := skillBundleDocument{ + Name: offer.Spec.Skill.Name, + Version: offer.Spec.Skill.Version, + SHA256: strings.ToLower(offer.Spec.Skill.SHA256), + DisplayName: offer.Spec.Skill.DisplayName, + Description: offer.Spec.Skill.Description, + Offer: offer.Name, + Namespace: offer.Namespace, + } + data, err := json.MarshalIndent(document, "", " ") + if err != nil { + return "", fmt.Errorf("marshal skill.json for %s/%s: %w", offer.Namespace, offer.Name, err) + } + return string(data), nil +} + +// buildSkillBundleMetaConfigMap renders the controller-owned metadata +// ConfigMap mounted into the bundle server: skill.json (descriptor + +// health target) and httpd.conf (MIME map). Owner-referenced to the offer +// so GC removes it when the offer is deleted. +func buildSkillBundleMetaConfigMap(offer *monetizeapi.ServiceOffer) (*unstructured.Unstructured, error) { + skillJSON, err := buildSkillBundleJSON(offer) + if err != nil { + return nil, err + } + return &unstructured.Unstructured{ + Object: map[string]any{ + "apiVersion": "v1", + "kind": "ConfigMap", + "metadata": map[string]any{ + "name": skillBundleMetaName(offer.Name), + "namespace": offer.Namespace, + "ownerReferences": []any{ownerRefMap(offer)}, + "labels": skillBundleLabels(offer), + }, + "data": map[string]any{ + "skill.json": skillJSON, + "httpd.conf": skillBundleHTTPDConf, + }, + }, + }, nil +} + +// buildSkillBundleDeployment renders the static bundle server: a busybox +// httpd serving /www/bundle.tar.gz (projected from the operator's bundle +// ConfigMap) and /www/skill.json (projected from the meta ConfigMap). +// Restricted-PSS securityContext copied from the skill catalog / +// agentidentity httpd pattern — the same admission profile applies to any +// namespace that enforces Restricted PSS, and there is no reason for a +// static file server to run with more privilege. +// +// The pod template carries obol.org/content-hash = spec.skill.sha256[:8] +// so re-publishing a bundle (new hash) rolls the pod even though the +// Deployment spec is otherwise unchanged. +func buildSkillBundleDeployment(offer *monetizeapi.ServiceOffer) *unstructured.Unstructured { + name := monetizeapi.SkillBundleWorkloadName(offer.Name) + labels := skillBundleLabels(offer) + contentHash := strings.ToLower(offer.Spec.Skill.SHA256) + if len(contentHash) > 8 { + contentHash = contentHash[:8] + } + return &unstructured.Unstructured{ + Object: map[string]any{ + "apiVersion": "apps/v1", + "kind": "Deployment", + "metadata": map[string]any{ + "name": name, + "namespace": offer.Namespace, + "ownerReferences": []any{ownerRefMap(offer)}, + "labels": labels, + }, + "spec": map[string]any{ + "replicas": int64(1), + "selector": map[string]any{ + "matchLabels": labels, + }, + "template": map[string]any{ + "metadata": map[string]any{ + "labels": labels, + "annotations": map[string]any{ + "obol.org/content-hash": contentHash, + }, + }, + "spec": map[string]any{ + "securityContext": restrictedPodSecurityContext(), + "containers": []any{ + map[string]any{ + "name": "httpd", + "image": "busybox:1.36", + "command": []any{"httpd", "-f", "-p", "8080", "-h", "/www"}, + "securityContext": restrictedContainerSecurityContext(), + "ports": []any{ + map[string]any{"containerPort": skillBundlePort, "protocol": "TCP"}, + }, + "volumeMounts": []any{ + map[string]any{"name": "content", "mountPath": "/www", "readOnly": true}, + map[string]any{"name": "httpdconf", "mountPath": "/etc/httpd.conf", "subPath": "httpd.conf", "readOnly": true}, + }, + "resources": map[string]any{ + "requests": map[string]any{"cpu": "5m", "memory": "8Mi"}, + "limits": map[string]any{"cpu": "50m", "memory": "32Mi"}, + }, + }, + }, + "volumes": []any{ + // Single projected volume so both ConfigMaps land in + // the same /www docroot (two configMap volumes cannot + // share a mountPath). + map[string]any{ + "name": "content", + "projected": map[string]any{ + "sources": []any{ + map[string]any{ + "configMap": map[string]any{ + "name": offer.Spec.Skill.BundleConfigMap, + "items": []any{map[string]any{"key": monetizeapi.SkillBundleKey, "path": monetizeapi.SkillBundleKey}}, + }, + }, + map[string]any{ + "configMap": map[string]any{ + "name": skillBundleMetaName(offer.Name), + "items": []any{map[string]any{"key": "skill.json", "path": "skill.json"}}, + }, + }, + }, + }, + }, + map[string]any{ + "name": "httpdconf", + "configMap": map[string]any{ + "name": skillBundleMetaName(offer.Name), + "items": []any{map[string]any{"key": "httpd.conf", "path": "httpd.conf"}}, + }, + }, + }, + }, + }, + }, + }, + } +} + +// buildSkillBundleService renders the ClusterIP Service in front of the +// bundle server. Its name is the deterministic upstream the CLI pins into +// spec.upstream.service, which is how the existing reconcileUpstream and +// routeRuleFromOffer paths work unchanged for type=skill offers. +func buildSkillBundleService(offer *monetizeapi.ServiceOffer) *unstructured.Unstructured { + name := monetizeapi.SkillBundleWorkloadName(offer.Name) + labels := skillBundleLabels(offer) + return &unstructured.Unstructured{ + Object: map[string]any{ + "apiVersion": "v1", + "kind": "Service", + "metadata": map[string]any{ + "name": name, + "namespace": offer.Namespace, + "ownerReferences": []any{ownerRefMap(offer)}, + "labels": labels, + }, + "spec": map[string]any{ + "type": "ClusterIP", + "selector": labels, + "ports": []any{ + map[string]any{"port": skillBundlePort, "targetPort": skillBundlePort, "protocol": "TCP"}, + }, + }, + }, + } +} diff --git a/internal/serviceoffercontroller/skill_render_test.go b/internal/serviceoffercontroller/skill_render_test.go new file mode 100644 index 00000000..a400e07a --- /dev/null +++ b/internal/serviceoffercontroller/skill_render_test.go @@ -0,0 +1,233 @@ +package serviceoffercontroller + +import ( + "encoding/json" + "strings" + "testing" + + "github.com/ObolNetwork/obol-stack/internal/monetizeapi" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" +) + +func TestBuildSkillBundleDeployment_RestrictedPSS(t *testing.T) { + offer := skillTestOffer(nil) + dep := buildSkillBundleDeployment(offer) + + podSpec, found, err := unstructured.NestedMap(dep.Object, "spec", "template", "spec") + if err != nil || !found { + t.Fatalf("pod spec missing: found=%v err=%v", found, err) + } + + // Pod-level Restricted PSS — same assertions as the skill catalog / + // agentidentity httpd renders. + sc, ok := podSpec["securityContext"].(map[string]any) + if !ok { + t.Fatal("pod securityContext missing") + } + if sc["runAsNonRoot"] != true { + t.Errorf("runAsNonRoot = %v, want true", sc["runAsNonRoot"]) + } + if sc["runAsUser"] != int64(1000) || sc["runAsGroup"] != int64(1000) || sc["fsGroup"] != int64(1000) { + t.Errorf("uid/gid/fsGroup = %v/%v/%v, want 1000", sc["runAsUser"], sc["runAsGroup"], sc["fsGroup"]) + } + seccomp, _ := sc["seccompProfile"].(map[string]any) + if seccomp == nil || seccomp["type"] != "RuntimeDefault" { + t.Errorf("seccompProfile = %v, want RuntimeDefault", sc["seccompProfile"]) + } + + containers, _ := podSpec["containers"].([]any) + if len(containers) != 1 { + t.Fatalf("containers = %d, want 1", len(containers)) + } + container := containers[0].(map[string]any) + if container["image"] != "busybox:1.36" { + t.Errorf("image = %v, want busybox:1.36", container["image"]) + } + + csc, ok := container["securityContext"].(map[string]any) + if !ok { + t.Fatal("container securityContext missing") + } + if csc["allowPrivilegeEscalation"] != false { + t.Errorf("allowPrivilegeEscalation = %v, want false", csc["allowPrivilegeEscalation"]) + } + caps, _ := csc["capabilities"].(map[string]any) + drop, _ := caps["drop"].([]any) + if len(drop) != 1 || drop[0] != "ALL" { + t.Errorf("capabilities.drop = %v, want [ALL]", drop) + } + + command, _ := container["command"].([]any) + wantCommand := []any{"httpd", "-f", "-p", "8080", "-h", "/www"} + if len(command) != len(wantCommand) { + t.Fatalf("command = %v, want %v", command, wantCommand) + } + for i := range wantCommand { + if command[i] != wantCommand[i] { + t.Errorf("command[%d] = %v, want %v", i, command[i], wantCommand[i]) + } + } + + resources, _ := container["resources"].(map[string]any) + requests, _ := resources["requests"].(map[string]any) + limits, _ := resources["limits"].(map[string]any) + if requests["cpu"] != "5m" || requests["memory"] != "8Mi" { + t.Errorf("requests = %v, want 5m/8Mi", requests) + } + if limits["cpu"] != "50m" || limits["memory"] != "32Mi" { + t.Errorf("limits = %v, want 50m/32Mi", limits) + } +} + +func TestBuildSkillBundleDeployment_VolumesWireBothConfigMaps(t *testing.T) { + offer := skillTestOffer(nil) + dep := buildSkillBundleDeployment(offer) + + volumes, found, err := unstructured.NestedSlice(dep.Object, "spec", "template", "spec", "volumes") + if err != nil || !found || len(volumes) != 2 { + t.Fatalf("volumes = %v (found=%v err=%v), want 2 entries", volumes, found, err) + } + + content := volumes[0].(map[string]any) + if content["name"] != "content" { + t.Fatalf("volumes[0] = %v, want content", content["name"]) + } + projected, _ := content["projected"].(map[string]any) + sources, _ := projected["sources"].([]any) + if len(sources) != 2 { + t.Fatalf("projected sources = %d, want 2 (bundle CM + meta CM)", len(sources)) + } + bundleCM := sources[0].(map[string]any)["configMap"].(map[string]any) + if bundleCM["name"] != offer.Spec.Skill.BundleConfigMap { + t.Errorf("bundle source CM = %v, want %s", bundleCM["name"], offer.Spec.Skill.BundleConfigMap) + } + bundleItems, _ := bundleCM["items"].([]any) + if len(bundleItems) != 1 { + t.Fatalf("bundle items = %v", bundleItems) + } + item := bundleItems[0].(map[string]any) + if item["key"] != monetizeapi.SkillBundleKey || item["path"] != monetizeapi.SkillBundleKey { + t.Errorf("bundle item = %v, want %s→%s", item, monetizeapi.SkillBundleKey, monetizeapi.SkillBundleKey) + } + metaCM := sources[1].(map[string]any)["configMap"].(map[string]any) + if metaCM["name"] != skillBundleMetaName(offer.Name) { + t.Errorf("meta source CM = %v, want %s", metaCM["name"], skillBundleMetaName(offer.Name)) + } + + httpdconf := volumes[1].(map[string]any) + if httpdconf["name"] != "httpdconf" { + t.Fatalf("volumes[1] = %v, want httpdconf", httpdconf["name"]) + } + + mounts, _, _ := unstructured.NestedSlice(dep.Object, "spec", "template", "spec", "containers") + container := mounts[0].(map[string]any) + volumeMounts, _ := container["volumeMounts"].([]any) + var sawContent, sawConf bool + for _, vm := range volumeMounts { + m := vm.(map[string]any) + switch m["name"] { + case "content": + sawContent = m["mountPath"] == "/www" && m["readOnly"] == true + case "httpdconf": + sawConf = m["mountPath"] == "/etc/httpd.conf" && m["subPath"] == "httpd.conf" + } + } + if !sawContent { + t.Error("content volume must be mounted read-only at /www") + } + if !sawConf { + t.Error("httpd.conf must be subPath-mounted at /etc/httpd.conf") + } +} + +func TestBuildSkillBundleService_SelectorMatchesDeploymentLabels(t *testing.T) { + offer := skillTestOffer(nil) + svc := buildSkillBundleService(offer) + dep := buildSkillBundleDeployment(offer) + + selector, _, _ := unstructured.NestedMap(svc.Object, "spec", "selector") + podLabels, _, _ := unstructured.NestedMap(dep.Object, "spec", "template", "metadata", "labels") + if len(selector) == 0 || len(selector) != len(podLabels) { + t.Fatalf("selector = %v, pod labels = %v", selector, podLabels) + } + for k, v := range selector { + if podLabels[k] != v { + t.Errorf("selector[%s] = %v, pod label = %v", k, v, podLabels[k]) + } + } + + if svc.GetName() != monetizeapi.SkillBundleWorkloadName(offer.Name) { + t.Errorf("service name = %q, want %q", svc.GetName(), monetizeapi.SkillBundleWorkloadName(offer.Name)) + } + ports, _, _ := unstructured.NestedSlice(svc.Object, "spec", "ports") + if len(ports) != 1 { + t.Fatalf("ports = %v", ports) + } + port := ports[0].(map[string]any) + if port["port"] != int64(8080) || port["targetPort"] != int64(8080) { + t.Errorf("port = %v, want 8080→8080", port) + } + if svcType, _, _ := unstructured.NestedString(svc.Object, "spec", "type"); svcType != "ClusterIP" { + t.Errorf("service type = %q, want ClusterIP", svcType) + } +} + +func TestBuildSkillBundleMetaConfigMap_Content(t *testing.T) { + offer := skillTestOffer(nil) + cm, err := buildSkillBundleMetaConfigMap(offer) + if err != nil { + t.Fatalf("buildSkillBundleMetaConfigMap: %v", err) + } + + if cm.GetName() != skillBundleMetaName(offer.Name) { + t.Errorf("name = %q, want %q", cm.GetName(), skillBundleMetaName(offer.Name)) + } + if cm.GetNamespace() != offer.Namespace { + t.Errorf("namespace = %q, want %q", cm.GetNamespace(), offer.Namespace) + } + owners := cm.GetOwnerReferences() + if len(owners) != 1 || owners[0].Kind != monetizeapi.ServiceOfferKind || owners[0].Name != offer.Name { + t.Errorf("ownerReferences = %+v, want single ServiceOffer/%s owner", owners, offer.Name) + } + + httpdConf, _, _ := unstructured.NestedString(cm.Object, "data", "httpd.conf") + if !strings.Contains(httpdConf, ".tar.gz:application/gzip") || !strings.Contains(httpdConf, ".json:application/json") { + t.Errorf("httpd.conf = %q, want gzip + json MIME entries", httpdConf) + } + + skillJSON, _, _ := unstructured.NestedString(cm.Object, "data", "skill.json") + var doc map[string]any + if err := json.Unmarshal([]byte(skillJSON), &doc); err != nil { + t.Fatalf("skill.json is not valid JSON: %v\n%s", err, skillJSON) + } + wants := map[string]string{ + "name": "buy-x402", + "version": "0.1.0", + "sha256": skillTestBundleHash(), + "displayName": "Buy x402", + "offer": offer.Name, + "namespace": offer.Namespace, + } + for key, want := range wants { + if doc[key] != want { + t.Errorf("skill.json[%s] = %v, want %q", key, doc[key], want) + } + } +} + +func TestSkillBundleMetaName_RespectsK8sNameLimit(t *testing.T) { + long := strings.Repeat("a", 300) + if wn := monetizeapi.SkillBundleWorkloadName(long); len(wn) > 63 { + t.Errorf("workload name length = %d, want <= 63 (Service name / app label limit)", len(wn)) + } + name := skillBundleMetaName(long) + if len(name) > 253 { + t.Errorf("meta name length = %d, want <= 253", len(name)) + } + if name != skillBundleMetaName(long) { + t.Error("meta name must be deterministic") + } + if short := skillBundleMetaName("buy-x402"); short != monetizeapi.SkillBundleWorkloadName("buy-x402")+"-meta" { + t.Errorf("short names must equal SkillBundleWorkloadName+\"-meta\", got %q", short) + } +} diff --git a/internal/serviceoffercontroller/skill_test.go b/internal/serviceoffercontroller/skill_test.go new file mode 100644 index 00000000..38a5458b --- /dev/null +++ b/internal/serviceoffercontroller/skill_test.go @@ -0,0 +1,352 @@ +package serviceoffercontroller + +import ( + "context" + "crypto/sha256" + "encoding/base64" + "encoding/hex" + "strings" + "testing" + + "github.com/ObolNetwork/obol-stack/internal/monetizeapi" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/dynamic/fake" +) + +// newSkillTestController builds a Controller wired with the GVRs that +// reconcileSkillBundle touches, backed by the fake dynamic client (same +// harness style as newProvisioningTestController). +func newSkillTestController(t *testing.T, seedObjects ...*unstructured.Unstructured) *Controller { + t.Helper() + + objects := make([]runtime.Object, 0, len(seedObjects)) + for _, o := range seedObjects { + objects = append(objects, o) + } + + dynClient := fake.NewSimpleDynamicClientWithCustomListKinds( + runtime.NewScheme(), + map[schema.GroupVersionResource]string{ + monetizeapi.ConfigMapGVR: "ConfigMapList", + monetizeapi.ServiceGVR: "ServiceList", + monetizeapi.DeploymentGVR: "DeploymentList", + }, + objects..., + ) + + return &Controller{ + dynClient: dynClient, + client: dynClient, + services: dynClient.Resource(monetizeapi.ServiceGVR), + configMaps: dynClient.Resource(monetizeapi.ConfigMapGVR), + deployments: dynClient.Resource(monetizeapi.DeploymentGVR), + } +} + +// skillTestBundle is a stand-in for gzipped tar bytes; reconcileSkillBundle +// only hashes and measures them, it never unpacks. +var skillTestBundle = []byte("fake-gzipped-skill-bundle-bytes") + +func skillTestBundleHash() string { + sum := sha256.Sum256(skillTestBundle) + return hex.EncodeToString(sum[:]) +} + +// skillTestOffer returns a well-formed type=skill offer whose upstream is +// pinned to the controller-rendered bundle server, exactly as the CLI +// writes it. mutate lets each table case break one thing. +func skillTestOffer(mutate func(*monetizeapi.ServiceOffer)) *monetizeapi.ServiceOffer { + offer := &monetizeapi.ServiceOffer{ + ObjectMeta: metav1.ObjectMeta{ + Name: "buy-x402", + Namespace: "hermes-obol-agent", + UID: types.UID("offer-uid-1"), + }, + Spec: monetizeapi.ServiceOfferSpec{ + Type: "skill", + Skill: monetizeapi.ServiceOfferSkill{ + Name: "buy-x402", + Version: "0.1.0", + SHA256: skillTestBundleHash(), + BundleConfigMap: "buy-x402-skill-bundle", + DisplayName: "Buy x402", + Description: "Pre-sign x402 payment authorizations", + }, + Upstream: monetizeapi.ServiceOfferUpstream{ + Service: monetizeapi.SkillBundleWorkloadName("buy-x402"), + Namespace: "hermes-obol-agent", + Port: 8080, + HealthPath: "/skill.json", + }, + Payment: monetizeapi.ServiceOfferPayment{ + PayTo: "0x1111111111111111111111111111111111111111", + Network: "base-sepolia", + Price: monetizeapi.ServiceOfferPriceTable{PerRequest: "0.01"}, + }, + }, + } + if mutate != nil { + mutate(offer) + } + return offer +} + +// bundleConfigMapObject renders the operator-supplied bundle ConfigMap the +// way the apiserver stores it: binaryData values base64-encoded. +func bundleConfigMapObject(namespace, name string, payload []byte) *unstructured.Unstructured { + return &unstructured.Unstructured{Object: map[string]any{ + "apiVersion": "v1", + "kind": "ConfigMap", + "metadata": map[string]any{ + "name": name, + "namespace": namespace, + }, + "binaryData": map[string]any{ + monetizeapi.SkillBundleKey: base64.StdEncoding.EncodeToString(payload), + }, + }} +} + +func conditionByType(status monetizeapi.ServiceOfferStatus, conditionType string) *monetizeapi.Condition { + for i := range status.Conditions { + if status.Conditions[i].Type == conditionType { + return &status.Conditions[i] + } + } + return nil +} + +func TestReconcileSkillBundle_FailureTable(t *testing.T) { + oversize := make([]byte, monetizeapi.MaxSkillBundleBytes+1) + + cases := []struct { + name string + mutate func(*monetizeapi.ServiceOffer) + seed []*unstructured.Unstructured + wantReason string + }{ + { + name: "missing bundle ConfigMap", + seed: nil, + wantReason: "BundleMissing", + }, + { + name: "ConfigMap without binaryData key", + seed: []*unstructured.Unstructured{{Object: map[string]any{ + "apiVersion": "v1", + "kind": "ConfigMap", + "metadata": map[string]any{ + "name": "buy-x402-skill-bundle", + "namespace": "hermes-obol-agent", + }, + "data": map[string]any{"unrelated": "value"}, + }}}, + wantReason: "BundleMissing", + }, + { + name: "bundle exceeds MaxSkillBundleBytes", + seed: []*unstructured.Unstructured{ + bundleConfigMapObject("hermes-obol-agent", "buy-x402-skill-bundle", oversize), + }, + wantReason: "BundleTooLarge", + }, + { + name: "sha256 mismatch", + mutate: func(o *monetizeapi.ServiceOffer) { + o.Spec.Skill.SHA256 = strings.Repeat("ab", 32) + }, + seed: []*unstructured.Unstructured{ + bundleConfigMapObject("hermes-obol-agent", "buy-x402-skill-bundle", skillTestBundle), + }, + wantReason: "BundleHashMismatch", + }, + { + name: "spoofed upstream service", + mutate: func(o *monetizeapi.ServiceOffer) { + o.Spec.Upstream.Service = "litellm" + }, + seed: []*unstructured.Unstructured{ + bundleConfigMapObject("hermes-obol-agent", "buy-x402-skill-bundle", skillTestBundle), + }, + wantReason: "InvalidSkillUpstream", + }, + { + name: "spoofed upstream namespace", + mutate: func(o *monetizeapi.ServiceOffer) { + o.Spec.Upstream.Namespace = "llm" + }, + seed: []*unstructured.Unstructured{ + bundleConfigMapObject("hermes-obol-agent", "buy-x402-skill-bundle", skillTestBundle), + }, + wantReason: "InvalidSkillUpstream", + }, + { + name: "spoofed upstream port", + mutate: func(o *monetizeapi.ServiceOffer) { + o.Spec.Upstream.Port = 4000 + }, + seed: []*unstructured.Unstructured{ + bundleConfigMapObject("hermes-obol-agent", "buy-x402-skill-bundle", skillTestBundle), + }, + wantReason: "InvalidSkillUpstream", + }, + { + name: "missing required skill fields", + mutate: func(o *monetizeapi.ServiceOffer) { + o.Spec.Skill.SHA256 = "" + }, + wantReason: "InvalidSkillSpec", + }, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + c := newSkillTestController(t, tc.seed...) + offer := skillTestOffer(tc.mutate) + status := monetizeapi.ServiceOfferStatus{} + + ok, err := c.reconcileSkillBundle(context.Background(), &status, offer) + if err != nil { + t.Fatalf("reconcileSkillBundle: %v", err) + } + if ok { + t.Fatal("ok = true, want false") + } + + cond := conditionByType(status, "UpstreamHealthy") + if cond == nil { + t.Fatalf("UpstreamHealthy condition not set: %+v", status.Conditions) + } + if cond.Status != "False" { + t.Errorf("UpstreamHealthy = %q, want False", cond.Status) + } + if cond.Reason != tc.wantReason { + t.Errorf("UpstreamHealthy reason = %q, want %q (message: %s)", cond.Reason, tc.wantReason, cond.Message) + } + + // No children may be published when validation fails. + workload := monetizeapi.SkillBundleWorkloadName(offer.Name) + if resourceExists(t, c, "deployments", offer.Namespace, workload) { + t.Error("bundle Deployment must not be created on a failed validation") + } + if resourceExists(t, c, "services", offer.Namespace, workload) { + t.Error("bundle Service must not be created on a failed validation") + } + if resourceExists(t, c, "configmaps", offer.Namespace, skillBundleMetaName(offer.Name)) { + t.Error("meta ConfigMap must not be created on a failed validation") + } + }) + } +} + +func TestReconcileSkillBundle_HappyPathAppliesChildren(t *testing.T) { + c := newSkillTestController(t, + bundleConfigMapObject("hermes-obol-agent", "buy-x402-skill-bundle", skillTestBundle), + ) + offer := skillTestOffer(nil) + status := monetizeapi.ServiceOfferStatus{} + + ok, err := c.reconcileSkillBundle(context.Background(), &status, offer) + if err != nil { + t.Fatalf("reconcileSkillBundle: %v", err) + } + if !ok { + t.Fatalf("ok = false, want true; conditions: %+v", status.Conditions) + } + + // reconcileSkillBundle must NOT claim UpstreamHealthy itself — the + // shared reconcileUpstream health check owns that verdict. + if cond := conditionByType(status, "UpstreamHealthy"); cond != nil { + t.Errorf("UpstreamHealthy should be left to reconcileUpstream, got %+v", cond) + } + + workload := monetizeapi.SkillBundleWorkloadName(offer.Name) + ctx := context.Background() + + dep, err := c.deployments.Namespace(offer.Namespace).Get(ctx, workload, metav1.GetOptions{}) + if err != nil { + t.Fatalf("bundle Deployment missing: %v", err) + } + owners := dep.GetOwnerReferences() + if len(owners) != 1 || owners[0].Kind != monetizeapi.ServiceOfferKind || owners[0].Name != offer.Name { + t.Errorf("Deployment ownerReferences = %+v, want single ServiceOffer/%s owner", owners, offer.Name) + } + hash, _, _ := unstructured.NestedString(dep.Object, "spec", "template", "metadata", "annotations", "obol.org/content-hash") + if want := skillTestBundleHash()[:8]; hash != want { + t.Errorf("content-hash annotation = %q, want %q", hash, want) + } + + if _, err := c.services.Namespace(offer.Namespace).Get(ctx, workload, metav1.GetOptions{}); err != nil { + t.Fatalf("bundle Service missing: %v", err) + } + meta, err := c.configMaps.Namespace(offer.Namespace).Get(ctx, skillBundleMetaName(offer.Name), metav1.GetOptions{}) + if err != nil { + t.Fatalf("meta ConfigMap missing: %v", err) + } + skillJSON, _, _ := unstructured.NestedString(meta.Object, "data", "skill.json") + for _, want := range []string{`"name": "buy-x402"`, `"version": "0.1.0"`, skillTestBundleHash()} { + if !strings.Contains(skillJSON, want) { + t.Errorf("skill.json missing %q:\n%s", want, skillJSON) + } + } +} + +func TestReconcileSkillBundle_HashCompareIsCaseInsensitive(t *testing.T) { + c := newSkillTestController(t, + bundleConfigMapObject("hermes-obol-agent", "buy-x402-skill-bundle", skillTestBundle), + ) + offer := skillTestOffer(func(o *monetizeapi.ServiceOffer) { + o.Spec.Skill.SHA256 = strings.ToUpper(skillTestBundleHash()) + }) + status := monetizeapi.ServiceOfferStatus{} + + ok, err := c.reconcileSkillBundle(context.Background(), &status, offer) + if err != nil { + t.Fatalf("reconcileSkillBundle: %v", err) + } + if !ok { + t.Fatalf("uppercase spec hash must still match (CRD enforces lowercase, controller stays lenient); conditions: %+v", status.Conditions) + } +} + +func TestReconcileSkillBundle_RepublishedBundleRollsContentHash(t *testing.T) { + c := newSkillTestController(t, + bundleConfigMapObject("hermes-obol-agent", "buy-x402-skill-bundle", skillTestBundle), + ) + offer := skillTestOffer(nil) + ctx := context.Background() + + status := monetizeapi.ServiceOfferStatus{} + if ok, err := c.reconcileSkillBundle(ctx, &status, offer); err != nil || !ok { + t.Fatalf("first reconcile: ok=%v err=%v", ok, err) + } + + // Operator re-publishes a new bundle: CM bytes + spec hash both move. + newBundle := []byte("v2-bundle-bytes") + newSum := sha256.Sum256(newBundle) + newHash := hex.EncodeToString(newSum[:]) + if _, err := c.configMaps.Namespace(offer.Namespace).Update(ctx, + bundleConfigMapObject(offer.Namespace, "buy-x402-skill-bundle", newBundle), metav1.UpdateOptions{}); err != nil { + t.Fatalf("update bundle CM: %v", err) + } + offer.Spec.Skill.SHA256 = newHash + offer.Spec.Skill.Version = "0.2.0" + + status = monetizeapi.ServiceOfferStatus{} + if ok, err := c.reconcileSkillBundle(ctx, &status, offer); err != nil || !ok { + t.Fatalf("second reconcile: ok=%v err=%v conditions=%+v", ok, err, status.Conditions) + } + + dep, err := c.deployments.Namespace(offer.Namespace).Get(ctx, monetizeapi.SkillBundleWorkloadName(offer.Name), metav1.GetOptions{}) + if err != nil { + t.Fatalf("bundle Deployment missing after re-publish: %v", err) + } + hash, _, _ := unstructured.NestedString(dep.Object, "spec", "template", "metadata", "annotations", "obol.org/content-hash") + if want := newHash[:8]; hash != want { + t.Errorf("content-hash after re-publish = %q, want %q (pod must roll)", hash, want) + } +} diff --git a/internal/skillpkg/bundle.go b/internal/skillpkg/bundle.go new file mode 100644 index 00000000..c7a443a0 --- /dev/null +++ b/internal/skillpkg/bundle.go @@ -0,0 +1,241 @@ +// Package skillpkg packages a skill directory (SKILL.md + scripts, the +// same shape as internal/embed/skills/*) into a byte-for-byte +// deterministic gzipped tarball so the sha256 of the artifact is a +// stable identity for the skill content. The hash is what `obol sell +// skill` pins into the ServiceOffer spec and what `obol skills calldata +// set-hash` anchors on the ERC-8004 Identity Registry, so two packs of +// the same content MUST produce identical bytes regardless of file +// mtimes, ownership, umask, or on-disk creation order. +package skillpkg + +import ( + "archive/tar" + "bytes" + "compress/gzip" + "crypto/sha256" + "encoding/hex" + "fmt" + "io/fs" + "path" + "sort" + "strings" + "time" +) + +const ( + // MaxBundleBytes caps the gzipped bundle size. It mirrors + // monetizeapi.MaxSkillBundleBytes (asserted equal in tests): the + // artifact rides a ConfigMap (1MiB object cap) and must leave room + // for base64 expansion plus object metadata, so the cap applies to + // the compressed bytes. Pack enforces it so no caller can persist + // an artifact the controller would refuse to publish. + MaxBundleBytes = 900000 + + // ManifestName is the required top-level file. A skill bundle + // without SKILL.md is not a skill. + ManifestName = "SKILL.md" +) + +// entry is one path collected from the source tree, pre-sorted and +// pre-classified so the tar emission loop is trivially deterministic. +type entry struct { + path string // slash-separated, relative to root + dir bool + exec bool // any exec bit set on the source file +} + +// Pack walks root, packs every regular file and directory into a +// deterministic USTAR tar wrapped in a deterministic gzip stream, and +// returns the compressed bytes plus their lowercase hex sha256. +// +// Determinism rules: +// - entries sorted lexicographically by slash-separated path +// - file modes normalized to 0644 (0755 when any source exec bit is +// set); directory modes normalized to 0755 +// - ModTime fixed to the Unix epoch; uid/gid 0; uname/gname cleared +// - gzip header carries no name, zero mtime, and OS byte 255 +// +// Symlinks and irregular files are rejected (a bundle must be fully +// self-contained and portable); __pycache__ directories and *.pyc files +// are skipped, mirroring embed.WriteSkillSubset. The gzipped result is +// rejected when it exceeds MaxBundleBytes. +func Pack(root fs.FS) ([]byte, string, error) { + entries, err := collectEntries(root) + if err != nil { + return nil, "", err + } + + if !hasTopLevelManifest(entries) { + return nil, "", fmt.Errorf("skillpkg: bundle root must contain %s — a skill bundle without %s is not a skill", ManifestName, ManifestName) + } + + var buf bytes.Buffer + zw, err := gzip.NewWriterLevel(&buf, gzip.BestCompression) + if err != nil { + return nil, "", fmt.Errorf("skillpkg: gzip writer: %w", err) + } + // Deterministic gzip header: no original name, zero mtime (written + // as 0), and an explicit "unknown" OS byte so the output does not + // vary across platforms or Go releases. + zw.Header.Name = "" + zw.Header.ModTime = time.Time{} + zw.Header.OS = 255 + + tw := tar.NewWriter(zw) + for _, e := range entries { + if e.dir { + if err := tw.WriteHeader(dirHeader(e.path)); err != nil { + return nil, "", fmt.Errorf("skillpkg: write dir header %s: %w", e.path, err) + } + continue + } + data, err := fs.ReadFile(root, e.path) + if err != nil { + return nil, "", fmt.Errorf("skillpkg: read %s: %w", e.path, err) + } + if err := tw.WriteHeader(fileHeader(e.path, int64(len(data)), e.exec)); err != nil { + return nil, "", fmt.Errorf("skillpkg: write file header %s: %w", e.path, err) + } + if _, err := tw.Write(data); err != nil { + return nil, "", fmt.Errorf("skillpkg: write %s: %w", e.path, err) + } + } + if err := tw.Close(); err != nil { + return nil, "", fmt.Errorf("skillpkg: close tar: %w", err) + } + if err := zw.Close(); err != nil { + return nil, "", fmt.Errorf("skillpkg: close gzip: %w", err) + } + + gz := buf.Bytes() + if len(gz) > MaxBundleBytes { + return nil, "", fmt.Errorf("skillpkg: gzipped bundle is %d bytes, which exceeds the %d-byte skill bundle cap (the artifact must fit in a ConfigMap) — trim large assets from the skill directory", len(gz), MaxBundleBytes) + } + + sum := sha256.Sum256(gz) + return gz, hex.EncodeToString(sum[:]), nil +} + +// ScanSecrets walks root with the same entry rules as Pack and returns +// one human-readable warning per entry that looks like it carries +// secret material: .env-style files, id_rsa* key files, and any file +// whose content carries a PEM "PRIVATE KEY" marker. Warn-only by +// contract — callers print the warnings and proceed; a skill author may +// legitimately ship an .env.example. +func ScanSecrets(root fs.FS) ([]string, error) { + entries, err := collectEntries(root) + if err != nil { + return nil, err + } + + var warnings []string + for _, e := range entries { + if e.dir { + continue + } + base := path.Base(e.path) + switch { + case base == ".env" || strings.HasPrefix(base, ".env."): + warnings = append(warnings, fmt.Sprintf("%s: looks like an environment file — it will be published to every buyer", e.path)) + case strings.HasPrefix(base, "id_rsa"): + warnings = append(warnings, fmt.Sprintf("%s: looks like an SSH key file — it will be published to every buyer", e.path)) + } + data, err := fs.ReadFile(root, e.path) + if err != nil { + return nil, fmt.Errorf("skillpkg: read %s: %w", e.path, err) + } + if bytes.Contains(data, []byte("PRIVATE KEY")) { + warnings = append(warnings, fmt.Sprintf("%s: contains a PEM \"PRIVATE KEY\" marker — it will be published to every buyer", e.path)) + } + } + return warnings, nil +} + +// collectEntries walks root and returns the full, lexicographically +// sorted entry list. Symlinks and other irregular files error out; +// __pycache__ dirs and *.pyc files are skipped (they are interpreter +// artifacts that vary per machine and would break hash determinism). +func collectEntries(root fs.FS) ([]entry, error) { + var entries []entry + err := fs.WalkDir(root, ".", func(p string, d fs.DirEntry, walkErr error) error { + if walkErr != nil { + return walkErr + } + if p == "." { + return nil + } + if d.IsDir() { + if d.Name() == "__pycache__" { + return fs.SkipDir + } + entries = append(entries, entry{path: p, dir: true}) + return nil + } + if !d.Type().IsRegular() { + return fmt.Errorf("skillpkg: unsupported entry %q (%s): symlinks and special files cannot be packed into a skill bundle", p, d.Type()) + } + if strings.HasSuffix(d.Name(), ".pyc") { + return nil + } + info, err := d.Info() + if err != nil { + return fmt.Errorf("skillpkg: stat %s: %w", p, err) + } + entries = append(entries, entry{path: p, exec: info.Mode().Perm()&0o111 != 0}) + return nil + }) + if err != nil { + return nil, err + } + + // One sorted order for everything. Parents naturally precede their + // children ("a" < "a/b"), so extraction order is always valid. + sort.Slice(entries, func(i, j int) bool { return entries[i].path < entries[j].path }) + return entries, nil +} + +func hasTopLevelManifest(entries []entry) bool { + for _, e := range entries { + if !e.dir && e.path == ManifestName { + return true + } + } + return false +} + +// dirHeader builds the normalized tar header for a directory entry. +func dirHeader(p string) *tar.Header { + hdr := baseHeader(p + "/") + hdr.Typeflag = tar.TypeDir + hdr.Mode = 0o755 + return hdr +} + +// fileHeader builds the normalized tar header for a regular file. +func fileHeader(p string, size int64, exec bool) *tar.Header { + hdr := baseHeader(p) + hdr.Typeflag = tar.TypeReg + hdr.Size = size + hdr.Mode = 0o644 + if exec { + hdr.Mode = 0o755 + } + return hdr +} + +// baseHeader carries every normalized field shared by files and dirs: +// USTAR format, epoch mtime, zero atime/ctime, uid/gid 0, cleared +// uname/gname, forward-slash relative name. +func baseHeader(name string) *tar.Header { + return &tar.Header{ + Name: name, + Format: tar.FormatUSTAR, + ModTime: time.Unix(0, 0), + AccessTime: time.Time{}, + ChangeTime: time.Time{}, + Uid: 0, + Gid: 0, + Uname: "", + Gname: "", + } +} diff --git a/internal/skillpkg/bundle_test.go b/internal/skillpkg/bundle_test.go new file mode 100644 index 00000000..b1fcf386 --- /dev/null +++ b/internal/skillpkg/bundle_test.go @@ -0,0 +1,396 @@ +package skillpkg + +import ( + "archive/tar" + "bytes" + "compress/gzip" + "io" + "math/rand" + "os" + "path/filepath" + "strings" + "testing" + "testing/fstest" + "time" + + "github.com/ObolNetwork/obol-stack/internal/monetizeapi" +) + +// skillFS builds a minimal valid skill tree as a MapFS. mtime/sys +// fields are parameterized so tests can prove they don't leak into the +// hash. +func skillFS(modTime time.Time) fstest.MapFS { + return fstest.MapFS{ + "SKILL.md": &fstest.MapFile{Data: []byte("# my-skill\n"), Mode: 0o644, ModTime: modTime}, + "scripts/run.py": &fstest.MapFile{Data: []byte("print('hi')\n"), Mode: 0o755, ModTime: modTime}, + "references/ref.md": &fstest.MapFile{Data: []byte("ref\n"), Mode: 0o600, ModTime: modTime}, + } +} + +func TestMaxBundleBytes_MatchesMonetizeAPI(t *testing.T) { + if MaxBundleBytes != monetizeapi.MaxSkillBundleBytes { + t.Fatalf("skillpkg.MaxBundleBytes = %d, monetizeapi.MaxSkillBundleBytes = %d — these caps must agree", + MaxBundleBytes, monetizeapi.MaxSkillBundleBytes) + } +} + +func TestPack_Deterministic(t *testing.T) { + fsys := skillFS(time.Unix(1700000000, 0)) + + gz1, hash1, err := Pack(fsys) + if err != nil { + t.Fatalf("first pack: %v", err) + } + gz2, hash2, err := Pack(fsys) + if err != nil { + t.Fatalf("second pack: %v", err) + } + + if !bytes.Equal(gz1, gz2) { + t.Error("two packs of the same FS produced different bytes") + } + if hash1 != hash2 { + t.Errorf("two packs of the same FS produced different hashes: %s vs %s", hash1, hash2) + } + if len(hash1) != 64 || strings.ToLower(hash1) != hash1 { + t.Errorf("hash %q is not 64-char lowercase hex", hash1) + } +} + +// TestPack_MetadataIndependence proves on-disk metadata (mtimes, sys +// info, source modes that normalize to the same class) does not change +// the artifact hash. +func TestPack_MetadataIndependence(t *testing.T) { + tests := []struct { + name string + a, b fstest.MapFS + }{ + { + name: "different mtimes", + a: skillFS(time.Unix(0, 0)), + b: skillFS(time.Now()), + }, + { + name: "different owner-ish sys info", + a: fstest.MapFS{ + "SKILL.md": &fstest.MapFile{Data: []byte("x"), Mode: 0o644, Sys: &struct{ UID int }{1000}}, + }, + b: fstest.MapFS{ + "SKILL.md": &fstest.MapFile{Data: []byte("x"), Mode: 0o644, Sys: &struct{ UID int }{0}}, + }, + }, + { + name: "modes within the same normalization class", + a: fstest.MapFS{ + "SKILL.md": &fstest.MapFile{Data: []byte("x"), Mode: 0o644}, + "run.sh": &fstest.MapFile{Data: []byte("y"), Mode: 0o755}, + }, + b: fstest.MapFS{ + "SKILL.md": &fstest.MapFile{Data: []byte("x"), Mode: 0o600}, + "run.sh": &fstest.MapFile{Data: []byte("y"), Mode: 0o700}, + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + gzA, hashA, err := Pack(tt.a) + if err != nil { + t.Fatalf("pack a: %v", err) + } + gzB, hashB, err := Pack(tt.b) + if err != nil { + t.Fatalf("pack b: %v", err) + } + if hashA != hashB { + t.Errorf("hashes differ: %s vs %s", hashA, hashB) + } + if !bytes.Equal(gzA, gzB) { + t.Error("bytes differ for metadata-only variation") + } + }) + } +} + +// TestPack_CreationOrderIndependence writes the same content into two +// real directories in opposite creation order (and with different +// mtimes) and proves the hashes match. This is the on-disk analog of +// the MapFS determinism tests. +func TestPack_CreationOrderIndependence(t *testing.T) { + files := map[string]string{ + "SKILL.md": "# skill\n", + "scripts/a.py": "a\n", + "scripts/b.py": "b\n", + "references.txt": "r\n", + } + + writeAll := func(t *testing.T, order []string) string { + t.Helper() + dir := t.TempDir() + for _, rel := range order { + p := filepath.Join(dir, filepath.FromSlash(rel)) + if err := os.MkdirAll(filepath.Dir(p), 0o755); err != nil { + t.Fatal(err) + } + if err := os.WriteFile(p, []byte(files[rel]), 0o644); err != nil { + t.Fatal(err) + } + // Scatter mtimes so this also covers epoch normalization on + // a real filesystem. + mt := time.Now().Add(-time.Duration(len(rel)) * time.Hour) + if err := os.Chtimes(p, mt, mt); err != nil { + t.Fatal(err) + } + } + return dir + } + + dirA := writeAll(t, []string{"SKILL.md", "scripts/a.py", "scripts/b.py", "references.txt"}) + dirB := writeAll(t, []string{"references.txt", "scripts/b.py", "scripts/a.py", "SKILL.md"}) + + _, hashA, err := Pack(os.DirFS(dirA)) + if err != nil { + t.Fatalf("pack a: %v", err) + } + _, hashB, err := Pack(os.DirFS(dirB)) + if err != nil { + t.Fatalf("pack b: %v", err) + } + if hashA != hashB { + t.Errorf("creation order changed the hash: %s vs %s", hashA, hashB) + } +} + +func TestPack_Errors(t *testing.T) { + tests := []struct { + name string + fsys fstest.MapFS + wantSub string + }{ + { + name: "symlink rejected", + fsys: fstest.MapFS{ + "SKILL.md": &fstest.MapFile{Data: []byte("x"), Mode: 0o644}, + "link": &fstest.MapFile{Mode: 0o644 | os.ModeSymlink}, + }, + wantSub: "symlinks and special files", + }, + { + name: "missing SKILL.md", + fsys: fstest.MapFS{ + "scripts/run.py": &fstest.MapFile{Data: []byte("x"), Mode: 0o644}, + }, + wantSub: "must contain SKILL.md", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + _, _, err := Pack(tt.fsys) + if err == nil { + t.Fatal("expected error, got nil") + } + if !strings.Contains(err.Error(), tt.wantSub) { + t.Errorf("error %q does not contain %q", err, tt.wantSub) + } + }) + } +} + +func TestPack_RejectsOversizeAfterGzip(t *testing.T) { + // Incompressible (random) payload comfortably above the cap so the + // post-gzip size still exceeds MaxBundleBytes. + big := make([]byte, MaxBundleBytes+200000) + rnd := rand.New(rand.NewSource(42)) //nolint:gosec // determinism wanted, not security + rnd.Read(big) + + fsys := fstest.MapFS{ + "SKILL.md": &fstest.MapFile{Data: []byte("x"), Mode: 0o644}, + "blob.bin": &fstest.MapFile{Data: big, Mode: 0o644}, + } + + _, _, err := Pack(fsys) + if err == nil { + t.Fatal("expected oversize error, got nil") + } + if !strings.Contains(err.Error(), "900000-byte") { + t.Errorf("oversize error should name the cap, got: %v", err) + } +} + +func TestPack_SkipsPythonArtifacts(t *testing.T) { + fsys := fstest.MapFS{ + "SKILL.md": &fstest.MapFile{Data: []byte("x"), Mode: 0o644}, + "scripts/run.py": &fstest.MapFile{Data: []byte("y"), Mode: 0o644}, + "scripts/run.pyc": &fstest.MapFile{Data: []byte("z"), Mode: 0o644}, + "scripts/__pycache__/run.cpython-312.pyc": &fstest.MapFile{Data: []byte("z"), Mode: 0o644}, + } + + gz, _, err := Pack(fsys) + if err != nil { + t.Fatal(err) + } + + names := tarEntryNames(t, gz) + for _, n := range names { + if strings.Contains(n, "pyc") || strings.Contains(n, "__pycache__") { + t.Errorf("python artifact leaked into bundle: %s", n) + } + } +} + +// TestPack_NormalizesHeaders cracks the artifact open and verifies the +// determinism-relevant tar header fields entry by entry. +func TestPack_NormalizesHeaders(t *testing.T) { + fsys := fstest.MapFS{ + "SKILL.md": &fstest.MapFile{Data: []byte("doc"), Mode: 0o600, ModTime: time.Now()}, + "scripts/run.sh": &fstest.MapFile{Data: []byte("#!/bin/sh\n"), Mode: 0o700, ModTime: time.Now()}, + } + + gz, _, err := Pack(fsys) + if err != nil { + t.Fatal(err) + } + + zr, err := gzip.NewReader(bytes.NewReader(gz)) + if err != nil { + t.Fatal(err) + } + if zr.Header.Name != "" { + t.Errorf("gzip header name = %q, want empty", zr.Header.Name) + } + if zr.Header.OS != 255 { + t.Errorf("gzip header OS = %d, want 255", zr.Header.OS) + } + + wantModes := map[string]int64{ + "SKILL.md": 0o644, + "scripts/": 0o755, + "scripts/run.sh": 0o755, // exec bit on source promotes to 0755 + } + tr := tar.NewReader(zr) + var got []string + for { + hdr, err := tr.Next() + if err == io.EOF { + break + } + if err != nil { + t.Fatal(err) + } + got = append(got, hdr.Name) + if want, ok := wantModes[hdr.Name]; ok && hdr.Mode != want { + t.Errorf("%s mode = %o, want %o", hdr.Name, hdr.Mode, want) + } + if !hdr.ModTime.Equal(time.Unix(0, 0)) { + t.Errorf("%s mtime = %v, want epoch", hdr.Name, hdr.ModTime) + } + if hdr.Uid != 0 || hdr.Gid != 0 || hdr.Uname != "" || hdr.Gname != "" { + t.Errorf("%s ownership not cleared: uid=%d gid=%d uname=%q gname=%q", hdr.Name, hdr.Uid, hdr.Gid, hdr.Uname, hdr.Gname) + } + } + + want := []string{"SKILL.md", "scripts/", "scripts/run.sh"} + if strings.Join(got, ",") != strings.Join(want, ",") { + t.Errorf("entry order = %v, want %v", got, want) + } +} + +func TestScanSecrets(t *testing.T) { + tests := []struct { + name string + fsys fstest.MapFS + wantCount int + wantSub string + }{ + { + name: "clean skill", + fsys: fstest.MapFS{ + "SKILL.md": &fstest.MapFile{Data: []byte("doc"), Mode: 0o644}, + "scripts/run.py": &fstest.MapFile{Data: []byte("print(1)"), Mode: 0o644}, + }, + wantCount: 0, + }, + { + name: "dotenv file", + fsys: fstest.MapFS{ + "SKILL.md": &fstest.MapFile{Data: []byte("doc"), Mode: 0o644}, + ".env": &fstest.MapFile{Data: []byte("API_KEY=x"), Mode: 0o644}, + }, + wantCount: 1, + wantSub: "environment file", + }, + { + name: "dotenv variant", + fsys: fstest.MapFS{ + "SKILL.md": &fstest.MapFile{Data: []byte("doc"), Mode: 0o644}, + ".env.locals": &fstest.MapFile{Data: []byte("API_KEY=x"), Mode: 0o644}, + }, + wantCount: 1, + wantSub: "environment file", + }, + { + name: "ssh key name", + fsys: fstest.MapFS{ + "SKILL.md": &fstest.MapFile{Data: []byte("doc"), Mode: 0o644}, + "keys/id_rsa": &fstest.MapFile{Data: []byte("whatever"), Mode: 0o600}, + }, + wantCount: 1, + wantSub: "SSH key", + }, + { + name: "pem marker in content", + fsys: fstest.MapFS{ + "SKILL.md": &fstest.MapFile{Data: []byte("doc"), Mode: 0o644}, + "creds.txt": &fstest.MapFile{Data: []byte("-----BEGIN EC PRIVATE KEY-----\nabc\n"), Mode: 0o644}, + }, + wantCount: 1, + wantSub: "PRIVATE KEY", + }, + { + name: "key file with pem content warns for both", + fsys: fstest.MapFS{ + "SKILL.md": &fstest.MapFile{Data: []byte("doc"), Mode: 0o644}, + "id_rsa": &fstest.MapFile{Data: []byte("-----BEGIN OPENSSH PRIVATE KEY-----\n"), Mode: 0o600}, + }, + wantCount: 2, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + warnings, err := ScanSecrets(tt.fsys) + if err != nil { + t.Fatal(err) + } + if len(warnings) != tt.wantCount { + t.Fatalf("got %d warnings %v, want %d", len(warnings), warnings, tt.wantCount) + } + if tt.wantSub != "" && !strings.Contains(strings.Join(warnings, "\n"), tt.wantSub) { + t.Errorf("warnings %v do not mention %q", warnings, tt.wantSub) + } + }) + } +} + +// tarEntryNames decompresses and lists tar entry names. +func tarEntryNames(t *testing.T, gz []byte) []string { + t.Helper() + zr, err := gzip.NewReader(bytes.NewReader(gz)) + if err != nil { + t.Fatal(err) + } + tr := tar.NewReader(zr) + var names []string + for { + hdr, err := tr.Next() + if err == io.EOF { + break + } + if err != nil { + t.Fatal(err) + } + names = append(names, hdr.Name) + } + return names +} diff --git a/internal/stack/backend_k3s.go b/internal/stack/backend_k3s.go index c8e56be5..bea1bbdc 100644 --- a/internal/stack/backend_k3s.go +++ b/internal/stack/backend_k3s.go @@ -63,6 +63,9 @@ func (b *K3sBackend) Init(cfg *config.Config, u *ui.UI, stackID string) error { k3sConfig := embed.K3sConfig k3sConfig = strings.ReplaceAll(k3sConfig, "{{STACK_ID}}", stackID) k3sConfig = strings.ReplaceAll(k3sConfig, "{{DATA_DIR}}", absDataDir) + // LAN SANs so worker nodes can join this server by IP or hostname. + k3sConfig = strings.ReplaceAll(k3sConfig, "{{NODE_IP}}", OutboundIP()) + k3sConfig = strings.ReplaceAll(k3sConfig, "{{NODE_HOSTNAME}}", nodeHostname()) k3sConfigPath := filepath.Join(cfg.ConfigDir, k3sConfigFile) if err := os.WriteFile(k3sConfigPath, []byte(k3sConfig), 0o600); err != nil { diff --git a/internal/stack/backend_k3s_init_test.go b/internal/stack/backend_k3s_init_test.go new file mode 100644 index 00000000..037426b3 --- /dev/null +++ b/internal/stack/backend_k3s_init_test.go @@ -0,0 +1,48 @@ +package stack + +import ( + "os" + "path/filepath" + "strings" + "testing" + + "github.com/ObolNetwork/obol-stack/internal/config" + "github.com/ObolNetwork/obol-stack/internal/ui" +) + +// TestK3sBackend_Init_SubstitutesNodeSANs verifies that Init renders the +// embedded k3s-config.yaml with every {{...}} placeholder resolved and the +// node's LAN IP + hostname injected into the tls-san block, so a worker node +// can join the server by either address. +func TestK3sBackend_Init_SubstitutesNodeSANs(t *testing.T) { + dir := t.TempDir() + cfg := &config.Config{ + ConfigDir: dir, + DataDir: filepath.Join(dir, "data"), + } + + b := &K3sBackend{} + if err := b.Init(cfg, ui.New(false), "teststack"); err != nil { + t.Fatalf("Init: %v", err) + } + + data, err := os.ReadFile(filepath.Join(dir, k3sConfigFile)) + if err != nil { + t.Fatalf("read rendered k3s config: %v", err) + } + + rendered := string(data) + if strings.Contains(rendered, "{{") { + t.Errorf("rendered k3s config still has an unsubstituted placeholder:\n%s", rendered) + } + + if !strings.Contains(rendered, "tls-san") { + t.Fatal("rendered k3s config has no tls-san block") + } + + for _, want := range []string{OutboundIP(), nodeHostname()} { + if !strings.Contains(rendered, want) { + t.Errorf("tls-san missing %q\n%s", want, rendered) + } + } +} diff --git a/internal/stack/node.go b/internal/stack/node.go new file mode 100644 index 00000000..46b393e8 --- /dev/null +++ b/internal/stack/node.go @@ -0,0 +1,128 @@ +package stack + +import ( + "fmt" + "net" + "os" + "os/exec" + "path/filepath" + "strings" + + "github.com/ObolNetwork/obol-stack/internal/config" +) + +// k3sAPIPort is the standard k3s supervisor/apiserver port a joining agent dials. +const k3sAPIPort = 6443 + +// OutboundIP returns this host's primary outbound IPv4 address, discovered by +// opening a UDP socket toward a public address (no packets are actually sent). +// It is the address a LAN peer would use to reach this host and the one k3s +// advertises as the node InternalIP. Falls back to 127.0.0.1. +func OutboundIP() string { + conn, err := net.Dial("udp", "8.8.8.8:80") + if err != nil { + return "127.0.0.1" + } + defer conn.Close() + + if addr, ok := conn.LocalAddr().(*net.UDPAddr); ok && addr.IP != nil { + return addr.IP.String() + } + + return "127.0.0.1" +} + +// nodeHostname returns this host's hostname, or "localhost" if unavailable. +func nodeHostname() string { + h, err := os.Hostname() + if err != nil || strings.TrimSpace(h) == "" { + return "localhost" + } + + return h +} + +// K3sNodeTokenPath returns the path to the k3s server join token for the k3s +// backend's data-dir. It mirrors `data-dir: {{DATA_DIR}}/k3s` in the embedded +// k3s-config.yaml — NOT the default /var/lib/rancher/k3s, which obol overrides. +func K3sNodeTokenPath(cfg *config.Config) string { + absDataDir, err := filepath.Abs(cfg.DataDir) + if err != nil { + absDataDir = cfg.DataDir + } + + return filepath.Join(absDataDir, "k3s", "server", "node-token") +} + +// ReadK3sNodeToken reads the root-owned k3s server join token via sudo. +func ReadK3sNodeToken(cfg *config.Config) (string, error) { + path := K3sNodeTokenPath(cfg) + + out, err := exec.Command("sudo", "cat", path).Output() + if err != nil { + return "", fmt.Errorf("read k3s node-token at %s (is this host the running k3s server?): %w", path, err) + } + + token := strings.TrimSpace(string(out)) + if token == "" { + return "", fmt.Errorf("k3s node-token at %s is empty", path) + } + + return token, nil +} + +// K3sServerURL returns the https URL a joining agent dials. When override is +// empty it uses this host's primary LAN IP and the standard k3s API port. +func K3sServerURL(override string) string { + if override != "" { + return override + } + + return fmt.Sprintf("https://%s:%d", OutboundIP(), k3sAPIPort) +} + +// K3sBinaryVersion returns the k3s release string (e.g. "v1.35.5+k3s1") of the +// k3s binary in BinDir, used to pin a joining agent to the server's version. +// Returns "" when it can't be determined (the installer then picks stable). +func K3sBinaryVersion(cfg *config.Config) string { + out, err := exec.Command(filepath.Join(cfg.BinDir, "k3s"), "--version").Output() + if err != nil { + return "" + } + + return parseK3sVersion(string(out)) +} + +// parseK3sVersion extracts the version token from `k3s --version` output, +// whose first line looks like: "k3s version v1.35.5+k3s1 (6a4781ad)". +func parseK3sVersion(out string) string { + firstLine, _, _ := strings.Cut(out, "\n") + + fields := strings.Fields(firstLine) + for i, f := range fields { + if f == "version" && i+1 < len(fields) { + return fields[i+1] + } + } + + return "" +} + +// K3sAgentJoinCommand builds the copy-pasteable one-liner an operator runs on a +// Linux worker node to join this stack's k3s cluster. When version is non-empty +// the agent install is pinned to it (agents should match the server version). +func K3sAgentJoinCommand(serverURL, token, version string) string { + var b strings.Builder + + b.WriteString("curl -sfL https://get.k3s.io | ") + + if version != "" { + b.WriteString("INSTALL_K3S_VERSION=" + version + " ") + } + + b.WriteString("K3S_URL=" + serverURL + " ") + b.WriteString("K3S_TOKEN='" + token + "' ") + b.WriteString("sh -s - agent") + + return b.String() +} diff --git a/internal/stack/node_test.go b/internal/stack/node_test.go new file mode 100644 index 00000000..cfe27486 --- /dev/null +++ b/internal/stack/node_test.go @@ -0,0 +1,111 @@ +package stack + +import ( + "strings" + "testing" + + "github.com/ObolNetwork/obol-stack/internal/config" +) + +func TestK3sAgentJoinCommand(t *testing.T) { + const ( + token = "K10abc123::server:def456" + server = "https://192.168.50.203:6443" + ) + + tests := []struct { + name string + version string + want []string + absent []string + }{ + { + name: "pinned version", + version: "v1.35.5+k3s1", + want: []string{ + "curl -sfL https://get.k3s.io | ", + "INSTALL_K3S_VERSION=v1.35.5+k3s1 ", + "K3S_URL=https://192.168.50.203:6443 ", + "K3S_TOKEN='K10abc123::server:def456' ", + "sh -s - agent", + }, + }, + { + name: "unpinned version omits INSTALL_K3S_VERSION", + version: "", + want: []string{ + "K3S_URL=https://192.168.50.203:6443 ", + "sh -s - agent", + }, + absent: []string{"INSTALL_K3S_VERSION"}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := K3sAgentJoinCommand(server, token, tt.version) + for _, w := range tt.want { + if !strings.Contains(got, w) { + t.Errorf("join command missing %q\n got: %s", w, got) + } + } + + for _, a := range tt.absent { + if strings.Contains(got, a) { + t.Errorf("join command should not contain %q\n got: %s", a, got) + } + } + }) + } +} + +func TestParseK3sVersion(t *testing.T) { + tests := []struct { + name string + in string + want string + }{ + {"standard two-line output", "k3s version v1.35.5+k3s1 (6a4781ad)\ngo version go1.25.9\n", "v1.35.5+k3s1"}, + {"single line no trailing newline", "k3s version v1.30.0+k3s1", "v1.30.0+k3s1"}, + {"empty", "", ""}, + {"unexpected format", "something else entirely", ""}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := parseK3sVersion(tt.in); got != tt.want { + t.Errorf("parseK3sVersion(%q) = %q, want %q", tt.in, got, tt.want) + } + }) + } +} + +func TestK3sServerURL(t *testing.T) { + if got := K3sServerURL("https://example.local:6443"); got != "https://example.local:6443" { + t.Errorf("override should be returned verbatim, got %q", got) + } + + got := K3sServerURL("") + if !strings.HasPrefix(got, "https://") || !strings.HasSuffix(got, ":6443") { + t.Errorf("default server URL malformed: %q", got) + } +} + +func TestK3sNodeTokenPath(t *testing.T) { + cfg := &config.Config{DataDir: "/tmp/obol-data"} + + got := K3sNodeTokenPath(cfg) + if !strings.HasSuffix(got, "/k3s/server/node-token") { + t.Errorf("token path = %q, want suffix /k3s/server/node-token", got) + } + + if !strings.HasPrefix(got, "/tmp/obol-data") { + t.Errorf("token path should be under the data-dir, got %q", got) + } +} + +func TestOutboundIP_NeverEmpty(t *testing.T) { + if got := OutboundIP(); got == "" { + t.Error("OutboundIP must never return empty (falls back to 127.0.0.1)") + } +} diff --git a/internal/stack/safety_test.go b/internal/stack/safety_test.go index e0fa3eb3..e3b32159 100644 --- a/internal/stack/safety_test.go +++ b/internal/stack/safety_test.go @@ -119,8 +119,8 @@ func TestErrSafetyAborted_IsExported(t *testing.T) { func TestRawOffer_GateReadyRequiresBothConditions(t *testing.T) { cases := []struct { - name string - conds [][2]string // (type, status) + name string + conds [][2]string // (type, status) wantGate bool }{ {"both true", [][2]string{{"PaymentGateReady", "True"}, {"RoutePublished", "True"}}, true}, diff --git a/internal/stack/stack.go b/internal/stack/stack.go index 29a77692..949f901e 100644 --- a/internal/stack/stack.go +++ b/internal/stack/stack.go @@ -959,6 +959,7 @@ var baseLocalImages = []localImage{ {tag: "ghcr.io/obolnetwork/x402-verifier:latest", dockerfile: "Dockerfile.x402-verifier"}, {tag: "ghcr.io/obolnetwork/serviceoffer-controller:latest", dockerfile: "Dockerfile.serviceoffer-controller"}, {tag: "ghcr.io/obolnetwork/x402-buyer:latest", dockerfile: "Dockerfile.x402-buyer"}, + {tag: "ghcr.io/obolnetwork/x402-escrow:latest", dockerfile: "Dockerfile.x402-escrow"}, {tag: "ghcr.io/obolnetwork/demo-server:latest", dockerfile: "Dockerfile.demo-server"}, {tag: "ghcr.io/obolnetwork/obol-stack-public-storefront:latest", dockerfile: "Dockerfile.public-storefront"}, } diff --git a/internal/testutil/anvil.go b/internal/testutil/anvil.go index 75e07e3f..4486b328 100644 --- a/internal/testutil/anvil.go +++ b/internal/testutil/anvil.go @@ -227,6 +227,37 @@ func (f *AnvilFork) FundETH(t *testing.T, addr string, amount *big.Int) { t.Logf("funded %s with %s wei", addr, amount) } +// ApprovePermit2ViaImpersonation performs the one-time approve(Permit2, max) +// from owner on token via anvil_impersonateAccount — the fork-test stand-in +// for the on-chain approval a real wallet owner does once per token. Without +// it buy.py's Permit2 allowance preflight (correctly) refuses to pre-sign. +func (f *AnvilFork) ApprovePermit2ViaImpersonation(t *testing.T, token, owner string) { + t.Helper() + + const permit2 = "0x000000000022D473030F116dDEE9F6B43aC78BA3" + // approve(address,uint256) selector + permit2 + max uint256. + data := "0x095ea7b3" + + "000000000000000000000000" + strings.ToLower(strings.TrimPrefix(permit2, "0x")) + + strings.Repeat("f", 64) + + for _, call := range []string{ + fmt.Sprintf(`{"jsonrpc":"2.0","method":"anvil_impersonateAccount","params":["%s"],"id":1}`, owner), + fmt.Sprintf(`{"jsonrpc":"2.0","method":"eth_sendTransaction","params":[{"from":"%s","to":"%s","data":"%s"}],"id":1}`, owner, token, data), + fmt.Sprintf(`{"jsonrpc":"2.0","method":"anvil_stopImpersonatingAccount","params":["%s"],"id":1}`, owner), + } { + resp, err := http.Post(f.RPCURL, "application/json", strings.NewReader(call)) + if err != nil { + t.Fatalf("approve Permit2 via impersonation: %v", err) + } + raw, _ := io.ReadAll(resp.Body) + resp.Body.Close() + if strings.Contains(string(raw), `"error"`) { + t.Fatalf("approve Permit2 via impersonation: %s", raw) + } + } + t.Logf("approved Permit2 for %s on token %s (impersonated)", owner, token) +} + // ClearCode removes contract code from an address on Anvil. // Required for deterministic Anvil accounts that have proxy contracts on Base Sepolia — // USDC's SignatureChecker sees code → tries EIP-1271 instead of ecrecover. diff --git a/internal/testutil/facilitator_real.go b/internal/testutil/facilitator_real.go index eda545f7..12a0230a 100644 --- a/internal/testutil/facilitator_real.go +++ b/internal/testutil/facilitator_real.go @@ -9,12 +9,24 @@ import ( "net/http" "os" "os/exec" + "runtime" "strconv" "testing" "time" ) -const x402FacilitatorImage = "ghcr.io/obolnetwork/x402-facilitator-prometheus-overlay:1.4.9" +const defaultX402FacilitatorImage = "ghcr.io/obolnetwork/x402-facilitator-prometheus-overlay:1.4.9" + +// x402FacilitatorImage resolves the facilitator image, honoring the same +// X402_FACILITATOR_IMAGE override the shell flows use (flows/lib.sh) so a +// locally-built facilitator (e.g. an upstream-sync candidate) can be smoked +// through the Go integration path without editing the pin. +func x402FacilitatorImage() string { + if img := os.Getenv("X402_FACILITATOR_IMAGE"); img != "" { + return img + } + return defaultX402FacilitatorImage +} // RealFacilitator wraps a running x402-rs facilitator process. // Unlike MockFacilitator, this validates real EIP-712 signatures against @@ -54,9 +66,16 @@ func StartRealFacilitatorWithOptions(t *testing.T, anvil *AnvilFork, opts RealFa port := l.Addr().(*net.TCPAddr).Port l.Close() - // The facilitator runs on the host, so it needs the localhost Anvil URL - // (not host.docker.internal which only resolves inside Docker/k3d). - anvilLocalURL := fmt.Sprintf("http://127.0.0.1:%d", anvil.Port) + // The facilitator runs in a Docker container. On Linux it gets + // `--network host`, so the host loopback works for the Anvil URL. On + // macOS, Docker Desktop's host networking does not share the Mac + // loopback — mirror flows/lib.sh::start_x402_facilitator_container: + // publish the port with -p and reach Anvil via host.docker.internal. + anvilHost := "127.0.0.1" + if runtime.GOOS == "darwin" { + anvilHost = "host.docker.internal" + } + anvilLocalURL := fmt.Sprintf("http://%s:%d", anvilHost, anvil.Port) // Generate config file. configPath := writeRealFacilitatorConfig(t, port, anvilLocalURL, anvil.Accounts[0].PrivateKey, opts) @@ -64,14 +83,23 @@ func StartRealFacilitatorWithOptions(t *testing.T, anvil *AnvilFork, opts RealFa ctx, cancel := context.WithCancel(context.Background()) containerName := fmt.Sprintf("obol-test-x402-facilitator-%d", time.Now().UnixNano()) - cmd := exec.CommandContext(ctx, - "docker", "run", "--rm", + // Linux: host networking, the facilitator binds the host port directly. + // macOS: publish the port instead — Docker Desktop's host networking + // does not share the Mac loopback (same split as flows/lib.sh). + netArgs := []string{"--network", "host"} + if runtime.GOOS == "darwin" { + netArgs = []string{"-p", fmt.Sprintf("%d:%d", port, port)} + } + args := append([]string{ + "run", "--rm", "--name", containerName, - "--network", "host", + }, netArgs...) + args = append(args, "-v", configPath+":/config.json:ro", - x402FacilitatorImage, + x402FacilitatorImage(), "--config", "/config.json", ) + cmd := exec.CommandContext(ctx, "docker", args...) var stderr bytes.Buffer @@ -110,21 +138,29 @@ func StartRealFacilitatorWithOptions(t *testing.T, anvil *AnvilFork, opts RealFa return rf } -// requireFacilitatorImage verifies the pinned facilitator image is available. +// requireFacilitatorImage verifies the facilitator image is available. // Local facilitator experiments should be packaged as a Docker image instead of -// depending on host checkout paths. +// depending on host checkout paths: build + tag the image, then point +// X402_FACILITATOR_IMAGE at it. An image already present locally is used as-is +// (never pulled), mirroring flows/lib.sh::docker_pull_public_image. func requireFacilitatorImage(t *testing.T) { t.Helper() + image := x402FacilitatorImage() if _, err := exec.LookPath("docker"); err != nil { - t.Fatalf("docker not installed; cannot run %s", x402FacilitatorImage) + t.Fatalf("docker not installed; cannot run %s", image) + } + + if err := exec.Command("docker", "image", "inspect", image).Run(); err == nil { + t.Logf("using local x402 facilitator image %s", image) + return } - pull := exec.Command("docker", "pull", x402FacilitatorImage) + pull := exec.Command("docker", "pull", image) if out, err := pull.CombinedOutput(); err != nil { - t.Fatalf("pull %s: %v\n%s", x402FacilitatorImage, err, out) + t.Fatalf("pull %s: %v\n%s", image, err, out) } - t.Logf("using x402 facilitator image %s", x402FacilitatorImage) + t.Logf("using x402 facilitator image %s", image) } // writeRealFacilitatorConfig writes a temporary config-test.json for the facilitator. diff --git a/internal/x402/card.go b/internal/x402/card.go new file mode 100644 index 00000000..7f67aed2 --- /dev/null +++ b/internal/x402/card.go @@ -0,0 +1,486 @@ +package x402 + +// MPP credit-card (Stripe stripe.charge) settlement for the seller gateway. +// +// Plugs the MPP credit-card method into the existing x402 verifier without +// disturbing the crypto path: +// +// - buildCardRequirement(): emits the card option as a 402 accepts[] entry, +// mirroring the MPP stripe.charge challenge.request (amount in currency +// minor units + currency/decimals + methodDetails{networkId, +// paymentMethodTypes}). +// - cardGateway / stripeCardGateway: a two-phase authorize -> capture/cancel +// against Stripe PaymentIntents (manual capture). The buyer's pre-authorized +// Shared Payment Token is AUTHORIZED before the upstream is served and only +// CAPTURED after a successful (<400) upstream response; a failed upstream +// CANCELS the authorization so the buyer is never charged for nothing. +// - serveCardGated(): the in-process HandleProxy branch — authorize-before- +// serve, capture-after-success, cancel-on-failure, with an in-memory SPT +// replay guard so a Shared Payment Token cannot be reused. +// +// Productionization notes (see README "Credit-card payments (MPP)"): +// - The Stripe secret is read from STRIPE_SECRET_KEY; the verifier Deployment +// sources it from the x402-secrets Secret. A per-offer/per-namespace Secret +// needs the verifier's resourceName-scoped secret RBAC to be widened +// deliberately and is intentionally deferred. +// - The replay guard is per-pod; the verifier runs single-replica, so this is +// sufficient today. A multi-replica verifier would need shared replay state. +// - The SPT is passed as the top-level form field shared_payment_granted_token +// per the cp0x-org/mppx reference; validate against a live Stripe "machine +// payments" account before relying on it in production. + +import ( + "context" + "encoding/base64" + "encoding/json" + "errors" + "fmt" + "log" + "net/http" + "net/url" + "os" + "strings" + "sync" + "time" + + x402types "github.com/x402-foundation/x402/go/types" +) + +const ( + // cardScheme is the PaymentRequirements.Scheme used for the card option so + // a card-capable buyer can distinguish it from the x402 "exact" crypto + // option co-offered on the same route. + cardScheme = "card" + // cardNetworkStripe identifies the Stripe rail in the card requirement. + cardNetworkStripe = "stripe" + // defaultCardCurrency is the fallback ISO-4217 currency for card offers. + defaultCardCurrency = "usd" + + // stripeAPIBase is the default Stripe API base URL (overridable on the + // gateway for tests). + stripeAPIBase = "https://api.stripe.com/v1" + + // cardStripeTimeout bounds each Stripe API call. Authorize/capture/cancel + // run on detached contexts so a client disconnect cannot cancel an + // in-flight money operation. + cardStripeTimeout = 20 * time.Second + + // sptReplayTTL is how long a seen Shared Payment Token stays blocked in the + // per-pod replay guard. SPTs are single-use and short-lived, so an hour is + // ample headroom over their validity window. + sptReplayTTL = time.Hour +) + +// IsCard reports whether this route is gated by the MPP credit-card method +// rather than x402 on-chain settlement. +func (r *RouteRule) IsCard() bool { return r != nil && r.Card != nil } + +// currencyMinorUnits returns the ISO-4217 minor-unit exponent (decimal places) +// for a currency, defaulting to 2. Stripe expects PaymentIntent amounts in the +// currency's smallest unit, which is not always cents (JPY has 0, BHD has 3). +func currencyMinorUnits(currency string) int { + switch strings.ToLower(strings.TrimSpace(currency)) { + case "jpy", "krw", "vnd", "clp", "isk", "bif", "djf", "gnf", "kmf", "pyg", "rwf", "ugx", "vuv", "xaf", "xof", "xpf": + return 0 + case "bhd", "iqd", "jod", "kwd", "omr", "tnd", "lyd": + return 3 + default: + return 2 + } +} + +func (c *CardRoute) cardDecimals() int { + if c == nil { + return 2 + } + if c.Decimals > 0 { + return c.Decimals + } + return currencyMinorUnits(c.Currency) +} + +func (c *CardRoute) cardCurrency() string { + if c != nil && c.Currency != "" { + return strings.ToLower(c.Currency) + } + return defaultCardCurrency +} + +func (c *CardRoute) cardPaymentMethodTypes() []string { + if c != nil && len(c.PaymentMethodTypes) > 0 { + return c.PaymentMethodTypes + } + return []string{"card"} +} + +// buildCardRequirement builds the 402 accepts[] entry advertising the MPP +// credit-card (Stripe) option for a card route. The Amount is in currency minor +// units (e.g. cents for usd, whole yen for jpy) to match Stripe's PaymentIntent +// API; the human decimal price is mirrored under Extra.request for MPP-aware +// clients that normalize against `decimals`. +func buildCardRequirement(rule *RouteRule) x402types.PaymentRequirements { + card := rule.Card + decimals := card.cardDecimals() + currency := card.cardCurrency() + pmt := card.cardPaymentMethodTypes() + amountMinor := decimalToAtomic(rule.Price, decimals) + + return x402types.PaymentRequirements{ + Scheme: cardScheme, + Network: cardNetworkStripe, + Amount: amountMinor, + Asset: "", // no on-chain asset for card settlement + PayTo: card.Account, + MaxTimeoutSeconds: 300, + Extra: map[string]any{ + "method": cardNetworkStripe, + "intent": "charge", + "currency": currency, + "decimals": decimals, + "networkId": card.NetworkID, + "paymentMethodTypes": pmt, + // Mirror the MPP stripe.charge challenge.request so an MPP card + // client can mint a Shared Payment Token against this offer. + "request": map[string]any{ + "amount": rule.Price, + "currency": currency, + "decimals": decimals, + "methodDetails": map[string]any{ + "networkId": card.NetworkID, + "paymentMethodTypes": pmt, + }, + }, + }, + } +} + +// cardCredential is the buyer-supplied card payment payload carried (base64 +// JSON) in the X-PAYMENT header: a Stripe Shared Payment Token plus an optional +// client-side external id for reconciliation. +type cardCredential struct { + SPT string `json:"spt"` + ExternalID string `json:"externalId,omitempty"` +} + +func (c cardCredential) normalize() (cardCredential, error) { + c.SPT = strings.TrimSpace(c.SPT) + if !strings.HasPrefix(c.SPT, "spt_") { + return cardCredential{}, errors.New(`card credential spt must start with "spt_"`) + } + return c, nil +} + +// parseCardCredential decodes the base64 X-PAYMENT card payload. It accepts both +// the bare payload ({spt,externalId}) and an x402-style wrapper ({payload:{...}}). +func parseCardCredential(header string) (cardCredential, error) { + raw, err := base64.StdEncoding.DecodeString(strings.TrimSpace(header)) + if err != nil { + return cardCredential{}, fmt.Errorf("invalid card credential base64: %w", err) + } + var direct cardCredential + if err := json.Unmarshal(raw, &direct); err == nil && direct.SPT != "" { + return direct.normalize() + } + var wrapper struct { + Payload cardCredential `json:"payload"` + } + if err := json.Unmarshal(raw, &wrapper); err == nil && wrapper.Payload.SPT != "" { + return wrapper.Payload.normalize() + } + return cardCredential{}, errors.New("card credential missing spt") +} + +// ── SPT replay guard ──────────────────────────────────────────────────────── + +// sptReplayGuard rejects reuse of a Shared Payment Token. A token is reserved +// for the duration of a request and either consumed (kept blocked for the TTL) +// on a captured charge or released (unblocked) when the charge does not land, +// so transient failures can be retried with the same token. +type sptReplayGuard struct { + mu sync.Mutex + seen map[string]time.Time + ttl time.Duration +} + +func newSPTReplayGuard(ttl time.Duration) *sptReplayGuard { + return &sptReplayGuard{seen: make(map[string]time.Time), ttl: ttl} +} + +// tryReserve records the token as in-flight and returns false if it is already +// reserved or recently consumed. +func (g *sptReplayGuard) tryReserve(spt string) bool { + now := time.Now() + g.mu.Lock() + defer g.mu.Unlock() + for k, t := range g.seen { + if now.Sub(t) > g.ttl { + delete(g.seen, k) + } + } + if _, exists := g.seen[spt]; exists { + return false + } + g.seen[spt] = now + return true +} + +// release unblocks a token so it can be retried (charge did not land). +func (g *sptReplayGuard) release(spt string) { + g.mu.Lock() + delete(g.seen, spt) + g.mu.Unlock() +} + +// consume keeps a token blocked for the TTL after a successful capture. +func (g *sptReplayGuard) consume(spt string) { + g.mu.Lock() + g.seen[spt] = time.Now() + g.mu.Unlock() +} + +// ── Stripe gateway ────────────────────────────────────────────────────────── + +// cardGateway is the two-phase card settlement seam: authorize holds funds, +// capture takes them after the upstream serves successfully, cancel releases +// the hold on failure. Implementations must be safe to call on the request +// path (card settlement is synchronous and online). +type cardGateway interface { + authorize(ctx context.Context, card *CardRoute, amountMinorUnits, currency string, cred cardCredential) (paymentIntentID string, err error) + capture(ctx context.Context, card *CardRoute, paymentIntentID string) error + cancel(ctx context.Context, card *CardRoute, paymentIntentID string) error +} + +// stripeCardGateway implements cardGateway against the Stripe PaymentIntents +// API (manual capture), adapted from github.com/cp0x-org/mppx/stripe. +type stripeCardGateway struct { + httpClient *http.Client + baseURL string + // secretKey returns the seller's Stripe secret key. + secretKey func() string +} + +func newStripeCardGateway() *stripeCardGateway { + return &stripeCardGateway{ + httpClient: &http.Client{Timeout: cardStripeTimeout}, + baseURL: stripeAPIBase, + secretKey: func() string { return strings.TrimSpace(os.Getenv("STRIPE_SECRET_KEY")) }, + } +} + +// defaultCardGateway / defaultSPTGuard are the package defaults used by +// serveCardGated. Kept as package vars (not Verifier fields) so the card path +// does not disturb the Verifier constructor; serveCardGated takes both so tests +// can inject fakes. +var ( + defaultCardGateway cardGateway = newStripeCardGateway() + defaultSPTGuard = newSPTReplayGuard(sptReplayTTL) +) + +// buildAuthorizeForm is the form body for a manual-capture Stripe PaymentIntent +// create+confirm (the authorization). Split out for unit testing. +func buildAuthorizeForm(amountMinorUnits, currency, spt string) url.Values { + form := url.Values{} + form.Set("amount", amountMinorUnits) + form.Set("currency", currency) + form.Set("confirm", "true") + form.Set("capture_method", "manual") + form.Set("shared_payment_granted_token", spt) + form.Set("automatic_payment_methods[enabled]", "true") + form.Set("automatic_payment_methods[allow_redirects]", "never") + return form +} + +func (s *stripeCardGateway) authorize(ctx context.Context, _ *CardRoute, amountMinorUnits, currency string, cred cardCredential) (string, error) { + id, status, err := s.do(ctx, s.baseURL+"/payment_intents", buildAuthorizeForm(amountMinorUnits, currency, cred.SPT), "obol_auth_"+cred.SPT) + if err != nil { + return "", err + } + // Manual capture + confirm: a successful authorization yields + // requires_capture (funds held, not taken). Accept succeeded defensively. + switch status { + case "requires_capture", "succeeded": + return id, nil + case "requires_action": + return "", errors.New("stripe PaymentIntent requires action (3DS) — not supported for machine payments") + default: + return "", fmt.Errorf("stripe authorize status: %s", status) + } +} + +func (s *stripeCardGateway) capture(ctx context.Context, _ *CardRoute, paymentIntentID string) error { + _, status, err := s.do(ctx, s.baseURL+"/payment_intents/"+url.PathEscape(paymentIntentID)+"/capture", url.Values{}, "obol_cap_"+paymentIntentID) + if err != nil { + return err + } + if status != "succeeded" { + return fmt.Errorf("stripe capture status: %s", status) + } + return nil +} + +func (s *stripeCardGateway) cancel(ctx context.Context, _ *CardRoute, paymentIntentID string) error { + _, _, err := s.do(ctx, s.baseURL+"/payment_intents/"+url.PathEscape(paymentIntentID)+"/cancel", url.Values{}, "") + return err +} + +// do issues a form-encoded POST to Stripe and returns the PaymentIntent id and +// status. Stripe uses HTTP Basic with the secret key as the username. +func (s *stripeCardGateway) do(ctx context.Context, endpoint string, form url.Values, idempotencyKey string) (id, status string, err error) { + key := s.secretKey() + if key == "" { + return "", "", errors.New("stripe secret key not configured (STRIPE_SECRET_KEY)") + } + req, err := http.NewRequestWithContext(ctx, http.MethodPost, endpoint, strings.NewReader(form.Encode())) + if err != nil { + return "", "", fmt.Errorf("build stripe request: %w", err) + } + req.Header.Set("Authorization", "Basic "+base64.StdEncoding.EncodeToString([]byte(key+":"))) + req.Header.Set("Content-Type", "application/x-www-form-urlencoded") + if idempotencyKey != "" { + req.Header.Set("Idempotency-Key", idempotencyKey) + } + + resp, err := s.httpClient.Do(req) + if err != nil { + return "", "", fmt.Errorf("stripe request failed: %w", err) + } + defer func() { _ = resp.Body.Close() }() + + if resp.StatusCode != http.StatusOK && resp.StatusCode != http.StatusCreated { + return "", "", fmt.Errorf("stripe API failed (HTTP %d)", resp.StatusCode) + } + var body struct { + ID string `json:"id"` + Status string `json:"status"` + } + if err := json.NewDecoder(resp.Body).Decode(&body); err != nil { + return "", "", fmt.Errorf("decode stripe response: %w", err) + } + return body.ID, body.Status, nil +} + +// cardReceiptJSON builds the X-PAYMENT-RESPONSE body surfaced to the buyer after +// a captured card charge. +func cardReceiptJSON(reference string) []byte { + b, err := json.Marshal(map[string]string{ + "method": cardNetworkStripe, + "intent": "charge", + "reference": reference, + }) + if err != nil { + return []byte("{}") + } + return b +} + +func detachedCardContext() (context.Context, context.CancelFunc) { + return context.WithTimeout(context.Background(), cardStripeTimeout) +} + +// cancelCardHold releases an authorized PaymentIntent and logs a failure. An +// uncancelled hold auto-expires at Stripe, but a swallowed error would leave no +// operator trail, so cancel failures are logged rather than ignored. +func cancelCardHold(gw cardGateway, rule *RouteRule, paymentIntentID string) { + ctx, cancel := detachedCardContext() + defer cancel() + if err := gw.cancel(ctx, rule.Card, paymentIntentID); err != nil { + log.Printf("x402-card: cancel authorization %s for %s/%s failed: %v", paymentIntentID, rule.OfferNamespace, rule.OfferName, err) + } +} + +// serveCardGated is the in-process seller gate for MPP credit-card offers, +// invoked from Verifier.HandleProxy when the matched route is a card route. It +// authorizes the buyer's SPT, proxies on a successful authorization, then +// captures after a <400 upstream response (cancelling the hold otherwise). Uses +// the JSON 402 (no HTML page). proxy is the already-built upstream handler. +func (v *Verifier) serveCardGated( + w http.ResponseWriter, + r *http.Request, + rule *RouteRule, + requirement x402types.PaymentRequirements, + extensions map[string]any, + proxy http.Handler, + gw cardGateway, + guard *sptReplayGuard, +) { + reqs := []x402types.PaymentRequirements{requirement} + + paymentHeader := r.Header.Get("X-PAYMENT") + if paymentHeader == "" { + sendPaymentRequiredJSON(w, r, reqs, extensions) + return + } + + cred, err := parseCardCredential(paymentHeader) + if err != nil { + log.Printf("x402-card: bad credential for %s/%s: %v", rule.OfferNamespace, rule.OfferName, err) + sendPaymentRequiredJSON(w, r, reqs, extensions) + return + } + + // Replay defense: a Shared Payment Token is single-use. + if !guard.tryReserve(cred.SPT) { + log.Printf("x402-card: replayed SPT rejected for %s/%s", rule.OfferNamespace, rule.OfferName) + sendPaymentRequiredJSON(w, r, reqs, extensions) + return + } + + currency, _ := requirement.Extra["currency"].(string) + + authCtx, cancelAuth := detachedCardContext() + paymentIntentID, err := gw.authorize(authCtx, rule.Card, requirement.Amount, currency, cred) + cancelAuth() + if err != nil { + // Authorization failed — buyer not charged; allow a retry with the SPT. + guard.release(cred.SPT) + log.Printf("x402-card: authorize failed for %s/%s: %v", rule.OfferNamespace, rule.OfferName, err) + sendPaymentRequiredJSON(w, r, reqs, extensions) + return + } + + // Authorized — wire capture-after-success / cancel-on-failure around the + // upstream via the shared settlementInterceptor. + interceptor := &settlementInterceptor{ + w: w, + settleFunc: func() bool { + cctx, cc := detachedCardContext() + defer cc() + if capErr := gw.capture(cctx, rule.Card, paymentIntentID); capErr != nil { + log.Printf("x402-card: capture failed for %s/%s: %v", rule.OfferNamespace, rule.OfferName, capErr) + // Release the authorization hold and unblock the SPT. + cancelCardHold(gw, rule, paymentIntentID) + guard.release(cred.SPT) + http.Error(w, "card capture failed", http.StatusBadGateway) + return false + } + guard.consume(cred.SPT) + w.Header().Set("X-PAYMENT-RESPONSE", base64.StdEncoding.EncodeToString(cardReceiptJSON(paymentIntentID))) + return true + }, + onFailure: func(statusCode int) { + // Upstream failed — cancel the hold; buyer is not charged. + cancelCardHold(gw, rule, paymentIntentID) + guard.release(cred.SPT) + log.Printf("x402-card: upstream returned %d for %s/%s, authorization cancelled", statusCode, rule.OfferNamespace, rule.OfferName) + }, + } + + // Defensive reconcile: settleFunc/onFailure only fire from the + // interceptor's WriteHeader. If the upstream handler panics or returns + // without ever writing a response (committed stays false), neither runs — + // cancel the hold and release the SPT so the buyer is not left with funds + // authorized for a request that was never served. Re-panic to preserve the + // server's own panic handling (e.g. http.ErrAbortHandler). + defer func() { + rec := recover() + if !interceptor.committed { + cancelCardHold(gw, rule, paymentIntentID) + guard.release(cred.SPT) + log.Printf("x402-card: upstream produced no response for %s/%s, authorization cancelled", rule.OfferNamespace, rule.OfferName) + } + if rec != nil { + panic(rec) + } + }() + proxy.ServeHTTP(interceptor, r) +} diff --git a/internal/x402/card_test.go b/internal/x402/card_test.go new file mode 100644 index 00000000..e65b4062 --- /dev/null +++ b/internal/x402/card_test.go @@ -0,0 +1,389 @@ +package x402 + +import ( + "context" + "encoding/base64" + "encoding/json" + "io" + "net/http" + "net/http/httptest" + "strings" + "sync" + "testing" + "time" +) + +func cardTestRule() *RouteRule { + return &RouteRule{ + Pattern: "/services/card-foo/*", + Price: "0.01", + OfferNamespace: "default", + OfferName: "card-foo", + Card: &CardRoute{ + Provider: "stripe", + Account: "acct_test123", + Currency: "usd", + Decimals: 2, + NetworkID: "stripenet_abc", + }, + } +} + +func cardCredHeader(spt string) string { + b, _ := json.Marshal(map[string]string{"spt": spt}) + return base64.StdEncoding.EncodeToString(b) +} + +func TestCurrencyMinorUnits(t *testing.T) { + cases := map[string]int{"usd": 2, "USD": 2, "eur": 2, "jpy": 0, "krw": 0, "bhd": 3, "kwd": 3, "zzz": 2, "": 2} + for in, want := range cases { + if got := currencyMinorUnits(in); got != want { + t.Errorf("currencyMinorUnits(%q) = %d, want %d", in, got, want) + } + } +} + +func TestBuildCardRequirement(t *testing.T) { + req := buildCardRequirement(cardTestRule()) + + if req.Scheme != cardScheme || req.Network != cardNetworkStripe { + t.Errorf("scheme/network = %q/%q", req.Scheme, req.Network) + } + if req.PayTo != "acct_test123" { + t.Errorf("payTo = %q, want acct_test123", req.PayTo) + } + if req.Amount != "1" { // "0.01" usd (2 decimals) -> 1 cent + t.Errorf("amount = %q, want 1 (minor units)", req.Amount) + } + if req.Asset != "" { + t.Errorf("asset = %q, want empty", req.Asset) + } + if req.Extra["currency"] != "usd" || req.Extra["networkId"] != "stripenet_abc" { + t.Errorf("extra = %v", req.Extra) + } + pmt, ok := req.Extra["paymentMethodTypes"].([]string) + if !ok || len(pmt) != 1 || pmt[0] != "card" { + t.Errorf("extra.paymentMethodTypes = %v, want [card]", req.Extra["paymentMethodTypes"]) + } +} + +func TestBuildCardRequirement_NonTwoDecimalCurrency(t *testing.T) { + rule := &RouteRule{Price: "100", Card: &CardRoute{Account: "acct_x", Currency: "jpy"}} + req := buildCardRequirement(rule) + // jpy has 0 minor-unit decimals: ¥100 -> amount "100". + if req.Amount != "100" { + t.Errorf("jpy amount = %q, want 100", req.Amount) + } + if req.Extra["decimals"] != 0 { + t.Errorf("jpy decimals = %v, want 0", req.Extra["decimals"]) + } +} + +func TestParseCardCredential(t *testing.T) { + b64 := func(v any) string { b, _ := json.Marshal(v); return base64.StdEncoding.EncodeToString(b) } + + t.Run("bare", func(t *testing.T) { + cred, err := parseCardCredential(b64(map[string]string{"spt": "spt_abc", "externalId": "e1"})) + if err != nil || cred.SPT != "spt_abc" || cred.ExternalID != "e1" { + t.Fatalf("got %+v err=%v", cred, err) + } + }) + t.Run("wrapped", func(t *testing.T) { + cred, err := parseCardCredential(b64(map[string]any{"payload": map[string]string{"spt": "spt_xyz"}})) + if err != nil || cred.SPT != "spt_xyz" { + t.Fatalf("got %+v err=%v", cred, err) + } + }) + for _, bad := range []struct{ name, header string }{ + {"bad base64", "!!!"}, + {"missing spt", b64(map[string]string{"externalId": "e1"})}, + {"wrong prefix", b64(map[string]string{"spt": "tok_abc"})}, + } { + t.Run(bad.name, func(t *testing.T) { + if _, err := parseCardCredential(bad.header); err == nil { + t.Errorf("expected error for %s", bad.name) + } + }) + } +} + +func TestBuildAuthorizeForm(t *testing.T) { + form := buildAuthorizeForm("1", "usd", "spt_abc") + want := map[string]string{ + "amount": "1", + "currency": "usd", + "confirm": "true", + "capture_method": "manual", + "shared_payment_granted_token": "spt_abc", + } + for k, v := range want { + if form.Get(k) != v { + t.Errorf("form[%q] = %q, want %q", k, form.Get(k), v) + } + } +} + +func TestSPTReplayGuard(t *testing.T) { + g := newSPTReplayGuard(time.Hour) + if !g.tryReserve("spt_a") { + t.Fatal("first reserve should succeed") + } + if g.tryReserve("spt_a") { + t.Fatal("second reserve of in-flight token must fail") + } + g.release("spt_a") + if !g.tryReserve("spt_a") { + t.Fatal("after release, reserve should succeed again") + } + g.consume("spt_a") + if g.tryReserve("spt_a") { + t.Fatal("consumed token must stay blocked") + } + // TTL expiry: a guard with a 0 TTL forgets immediately. + g0 := newSPTReplayGuard(0) + g0.consume("spt_b") + if !g0.tryReserve("spt_b") { + t.Fatal("token past TTL should be reservable") + } +} + +// ── stripeCardGateway against a mock Stripe server ────────────────────────── + +func TestStripeCardGateway_Lifecycle(t *testing.T) { + var paths []string + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + paths = append(paths, r.URL.Path) + if !strings.HasPrefix(r.Header.Get("Authorization"), "Basic ") { + t.Errorf("missing Basic auth on %s", r.URL.Path) + } + _ = r.ParseForm() + w.Header().Set("Content-Type", "application/json") + switch { + case strings.HasSuffix(r.URL.Path, "/capture"): + _, _ = io.WriteString(w, `{"id":"pi_x","status":"succeeded"}`) + case strings.HasSuffix(r.URL.Path, "/cancel"): + _, _ = io.WriteString(w, `{"id":"pi_x","status":"canceled"}`) + default: // authorize + if r.FormValue("capture_method") != "manual" { + t.Errorf("authorize capture_method = %q, want manual", r.FormValue("capture_method")) + } + if r.FormValue("shared_payment_granted_token") != "spt_live" { + t.Errorf("authorize spt = %q", r.FormValue("shared_payment_granted_token")) + } + _, _ = io.WriteString(w, `{"id":"pi_x","status":"requires_capture"}`) + } + })) + defer srv.Close() + + gw := &stripeCardGateway{httpClient: srv.Client(), baseURL: srv.URL, secretKey: func() string { return "sk_test" }} + ctx := context.Background() + + id, err := gw.authorize(ctx, nil, "100", "usd", cardCredential{SPT: "spt_live"}) + if err != nil || id != "pi_x" { + t.Fatalf("authorize id=%q err=%v", id, err) + } + if err := gw.capture(ctx, nil, id); err != nil { + t.Fatalf("capture: %v", err) + } + if err := gw.cancel(ctx, nil, id); err != nil { + t.Fatalf("cancel: %v", err) + } + if len(paths) != 3 { + t.Errorf("expected 3 Stripe calls, got %v", paths) + } +} + +func TestStripeCardGateway_NoKey(t *testing.T) { + gw := &stripeCardGateway{httpClient: http.DefaultClient, baseURL: stripeAPIBase, secretKey: func() string { return "" }} + if _, err := gw.authorize(context.Background(), nil, "1", "usd", cardCredential{SPT: "spt_a"}); err == nil { + t.Fatal("expected error when secret key unset") + } +} + +func TestStripeCardGateway_AuthorizeRequiresAction(t *testing.T) { + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + _, _ = io.WriteString(w, `{"id":"pi_y","status":"requires_action"}`) + })) + defer srv.Close() + gw := &stripeCardGateway{httpClient: srv.Client(), baseURL: srv.URL, secretKey: func() string { return "sk_test" }} + if _, err := gw.authorize(context.Background(), nil, "1", "usd", cardCredential{SPT: "spt_a"}); err == nil { + t.Fatal("requires_action must be an error (3DS not supported)") + } +} + +// ── serveCardGated with a fake gateway ────────────────────────────────────── + +type fakeGateway struct { + mu sync.Mutex + authErr error + capErr error + authCalls int + captured []string + canceled []string + pi string +} + +func (f *fakeGateway) authorize(_ context.Context, _ *CardRoute, _, _ string, _ cardCredential) (string, error) { + f.mu.Lock() + defer f.mu.Unlock() + f.authCalls++ + if f.authErr != nil { + return "", f.authErr + } + return f.pi, nil +} + +func (f *fakeGateway) capture(_ context.Context, _ *CardRoute, pi string) error { + f.mu.Lock() + defer f.mu.Unlock() + if f.capErr != nil { + return f.capErr + } + f.captured = append(f.captured, pi) + return nil +} + +func (f *fakeGateway) cancel(_ context.Context, _ *CardRoute, pi string) error { + f.mu.Lock() + defer f.mu.Unlock() + f.canceled = append(f.canceled, pi) + return nil +} + +func okProxy() http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + w.WriteHeader(http.StatusOK) + _, _ = io.WriteString(w, "upstream-ok") + }) +} + +func failProxy() http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + http.Error(w, "boom", http.StatusInternalServerError) + }) +} + +func gateOnce(gw cardGateway, guard *sptReplayGuard, sptHeader string, proxy http.Handler) *httptest.ResponseRecorder { + rule := cardTestRule() + req := buildCardRequirement(rule) + r := httptest.NewRequest(http.MethodPost, "/services/card-foo/x", nil) + if sptHeader != "" { + r.Header.Set("X-PAYMENT", sptHeader) + } + w := httptest.NewRecorder() + (&Verifier{}).serveCardGated(w, r, rule, req, nil, proxy, gw, guard) + return w +} + +func TestServeCardGated_NoPayment402(t *testing.T) { + gw := &fakeGateway{pi: "pi_1"} + w := gateOnce(gw, newSPTReplayGuard(time.Hour), "", okProxy()) + if w.Code != http.StatusPaymentRequired { + t.Fatalf("status = %d, want 402", w.Code) + } + if gw.authCalls != 0 { + t.Error("authorize must not be called without a credential") + } +} + +func TestServeCardGated_PaidAuthorizeCaptureProxy(t *testing.T) { + gw := &fakeGateway{pi: "pi_1"} + guard := newSPTReplayGuard(time.Hour) + w := gateOnce(gw, guard, cardCredHeader("spt_a"), okProxy()) + + if w.Code != http.StatusOK || w.Body.String() != "upstream-ok" { + t.Fatalf("status=%d body=%q", w.Code, w.Body.String()) + } + if gw.authCalls != 1 || len(gw.captured) != 1 || gw.captured[0] != "pi_1" { + t.Fatalf("auth=%d captured=%v", gw.authCalls, gw.captured) + } + if len(gw.canceled) != 0 { + t.Errorf("must not cancel on success: %v", gw.canceled) + } + hdr := w.Header().Get("X-PAYMENT-RESPONSE") + dec, _ := base64.StdEncoding.DecodeString(hdr) + var receipt map[string]string + _ = json.Unmarshal(dec, &receipt) + if receipt["reference"] != "pi_1" { + t.Errorf("receipt = %v, want reference pi_1", receipt) + } + // SPT now consumed: a replay is rejected and does not re-authorize. + w2 := gateOnce(gw, guard, cardCredHeader("spt_a"), okProxy()) + if w2.Code != http.StatusPaymentRequired { + t.Errorf("replay status = %d, want 402", w2.Code) + } + if gw.authCalls != 1 { + t.Errorf("replay must not re-authorize: authCalls=%d", gw.authCalls) + } +} + +func TestServeCardGated_AuthorizeFailure402(t *testing.T) { + gw := &fakeGateway{authErr: io.ErrUnexpectedEOF} + guard := newSPTReplayGuard(time.Hour) + w := gateOnce(gw, guard, cardCredHeader("spt_a"), okProxy()) + if w.Code != http.StatusPaymentRequired { + t.Fatalf("status = %d, want 402", w.Code) + } + if len(gw.captured) != 0 { + t.Error("must not capture when authorize fails") + } + // Authorization failure releases the SPT for retry. + if !guard.tryReserve("spt_a") { + t.Error("SPT should be released after authorize failure") + } +} + +func TestServeCardGated_UpstreamFailureCancels(t *testing.T) { + gw := &fakeGateway{pi: "pi_2"} + guard := newSPTReplayGuard(time.Hour) + w := gateOnce(gw, guard, cardCredHeader("spt_a"), failProxy()) + if w.Code != http.StatusInternalServerError { + t.Fatalf("status = %d, want 500 passthrough", w.Code) + } + if len(gw.captured) != 0 { + t.Errorf("must not capture on upstream failure: %v", gw.captured) + } + if len(gw.canceled) != 1 || gw.canceled[0] != "pi_2" { + t.Errorf("must cancel authorization on upstream failure: %v", gw.canceled) + } + if !guard.tryReserve("spt_a") { + t.Error("SPT should be released after upstream failure") + } +} + +func TestServeCardGated_UpstreamPanicCancels(t *testing.T) { + gw := &fakeGateway{pi: "pi_panic"} + guard := newSPTReplayGuard(time.Hour) + panicProxy := http.HandlerFunc(func(http.ResponseWriter, *http.Request) { panic("upstream blew up") }) + + // serveCardGated re-panics to preserve server panic handling; recover here. + func() { + defer func() { _ = recover() }() + gateOnce(gw, guard, cardCredHeader("spt_a"), panicProxy) + }() + + if len(gw.captured) != 0 { + t.Errorf("must not capture when upstream panics: %v", gw.captured) + } + if len(gw.canceled) != 1 || gw.canceled[0] != "pi_panic" { + t.Errorf("panic must cancel the authorization hold: %v", gw.canceled) + } + if !guard.tryReserve("spt_a") { + t.Error("SPT should be released after a panic") + } +} + +func TestServeCardGated_CaptureFailure(t *testing.T) { + gw := &fakeGateway{pi: "pi_3", capErr: io.ErrUnexpectedEOF} + guard := newSPTReplayGuard(time.Hour) + w := gateOnce(gw, guard, cardCredHeader("spt_a"), okProxy()) + if w.Code != http.StatusBadGateway { + t.Fatalf("status = %d, want 502 on capture failure", w.Code) + } + if len(gw.canceled) != 1 || gw.canceled[0] != "pi_3" { + t.Errorf("capture failure must cancel the hold: %v", gw.canceled) + } + if !guard.tryReserve("spt_a") { + t.Error("SPT should be released after capture failure") + } +} diff --git a/internal/x402/config.go b/internal/x402/config.go index e6ac3f4c..19c528f4 100644 --- a/internal/x402/config.go +++ b/internal/x402/config.go @@ -117,6 +117,20 @@ type RouteRule struct { // Surfaced as `accepts[].extra.agentRuntime`. AgentRuntime string `yaml:"agentRuntime,omitempty"` + // SkillName is the skill bundle identifier for type=skill offers. + // Surfaced as `accepts[].extra.skill.name` in the 402 response so + // buyers see which artifact they are paying to download. + SkillName string `yaml:"skillName,omitempty"` + + // SkillVersion is the skill bundle version (e.g. "0.1.0"). Surfaced + // as `accepts[].extra.skill.version`. + SkillVersion string `yaml:"skillVersion,omitempty"` + + // SkillSHA256 is the lowercase hex sha256 of the gzipped bundle bytes. + // Surfaced as `accepts[].extra.skill.sha256` so buyers can verify the + // downloaded artifact offline against what the 402 advertised. + SkillSHA256 string `yaml:"skillSha256,omitempty"` + // OfferType records the originating ServiceOffer.spec.type // (inference, http, agent, fine-tuning). The HTML 402 renderer uses // this to pick type-appropriate copy and Buy CTAs. @@ -145,6 +159,33 @@ type RouteRule struct { // minutes-to-hours here — operator-set values up to // MaxMaxTimeoutSeconds are honored verbatim. MaxTimeoutSeconds int64 `yaml:"maxTimeoutSeconds,omitempty"` + + // Card, when non-nil, marks this route as gated by the MPP credit-card + // method (Stripe stripe.charge) instead of x402 on-chain settlement. + // Mirrors ServiceOffer.spec.payment.card. SPIKE: the serviceoffer route + // source does not yet populate this from the CRD — see card.go. + Card *CardRoute `yaml:"card,omitempty"` +} + +// CardRoute carries the per-route MPP credit-card (Stripe) terms used when +// RouteRule.Card is non-nil. It is the card-method analog of the +// PayTo/Network/Asset fields above. +type CardRoute struct { + // Provider is the card payment provider (only "stripe" today). + Provider string `yaml:"provider,omitempty"` + // Account is the Stripe destination account id (acct_...) that receives + // settled funds — the card analog of PayTo. + Account string `yaml:"account,omitempty"` + // Currency is the ISO-4217 charge currency (e.g. "usd"). + Currency string `yaml:"currency,omitempty"` + // Decimals is the currency's minor-unit precision (2 for usd/eur). + Decimals int `yaml:"decimals,omitempty"` + // NetworkID is the Stripe "machine payments" Business Network id, + // advertised in the 402 challenge so MPP clients can mint an SPT. + NetworkID string `yaml:"networkId,omitempty"` + // PaymentMethodTypes are the accepted Stripe payment-method types, + // advertised in the challenge (defaults to ["card"]). + PaymentMethodTypes []string `yaml:"paymentMethodTypes,omitempty"` } // LoadConfig reads and parses a pricing configuration YAML file. diff --git a/internal/x402/escrow/gateway.go b/internal/x402/escrow/gateway.go new file mode 100644 index 00000000..040ebd81 --- /dev/null +++ b/internal/x402/escrow/gateway.go @@ -0,0 +1,264 @@ +// Package escrow defines the conditional-settlement seam between the +// servicebounty-controller and the x402 facilitator: hold a reward +// authorization now, release it to the fulfiller on an accepted verdict, or +// return it to the poster on expiry/rejection. +// +// The controller is only a bounded release TRIGGER, never a signer: the +// poster's agent pre-signs the upto authorization (payTo is signed into it, so +// whoever triggers settlement can only release the signed transfer to the +// signed recipient — or nothing). The facilitator holds the auth and performs +// settlement; the controller authenticates to it with a bearer token. This +// preserves the "controller holds no keys" invariant exactly as today. +// +// Two implementations: +// - HTTPGateway: POSTs to the facilitator's /escrow/{reserve,capture,void} +// routes (the ConditionalSettleFacilitator wrapper around x402-rs — the +// next slice on the facilitator side). +// - LedgerGateway: in-memory dev mode for local-first stacks and tests. It +// is escrow THEATER — nothing is held anywhere — and every receipt is +// labeled dev-ledger so it can never be mistaken for settlement. +package escrow + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "io" + "net/http" + "strings" + "sync" +) + +// States reported by a Gateway. They feed ServiceBountyStatus.EscrowState. +const ( + StateReserved = "Reserved" + StateCaptured = "Captured" + StateVoided = "Voided" +) + +// Permit2Voucher is a Uniswap Permit2 SignatureTransfer +// PermitBatchTransferFrom authorization signed by Owner, executable only by +// Spender (the escrow facilitator), with the recipients declared at signing +// time. The voucher binds owner, token, spender, nonce, deadline, and every +// recipient seat into one EIP-712 signature, so whoever holds it can only +// move the signed amounts to the signed recipients — or nothing. +type Permit2Voucher struct { + // Owner is the signer whose funds the voucher moves. + Owner string `json:"owner"` + // Token is the ERC-20 token contract address. + Token string `json:"token"` + // Network is the chain alias the voucher settles on (e.g. base-sepolia). + Network string `json:"network"` + // Spender is the only address allowed to execute the transfer — the + // escrow facilitator. + Spender string `json:"spender"` + // Nonce is the Permit2 unordered nonce as a uint256 decimal string. + Nonce string `json:"nonce"` + // Deadline is the unix timestamp the voucher expires at. + Deadline int64 `json:"deadline"` + // Recipients are the payout seats, amounts in atomic token units — one + // permitted entry per recipient seat. + Recipients []BatchRecipient `json:"recipients"` + // Signature is the 0x-hex 65-byte EIP-712 signature over the permit. + Signature string `json:"signature"` +} + +// ReserveRequest identifies the reward authorization the facilitator should +// verify and hold for a bounty. +type ReserveRequest struct { + // ID is the stable escrow key — the ServiceBounty UID. + ID string `json:"id"` + // Network, PayTo, Asset, Amount describe the reward leg (PayTo is the + // poster's refund address; the fulfiller payout address is bound in the + // pre-signed auth itself at claim time). + Network string `json:"network"` + PayTo string `json:"payTo"` + Asset string `json:"asset"` + Amount string `json:"amount"` + // Scheme is the x402 settlement scheme (upto today, authCapture later). + Scheme string `json:"scheme"` + // Voucher is the optional Permit2 batch-transfer authorization backing + // the reservation (real escrow). Gateways that hold nothing (the dev + // ledger) may ignore it. + Voucher *Permit2Voucher `json:"voucher,omitempty"` +} + +// Receipt is the gateway's record of an escrow operation. +type Receipt struct { + State string `json:"state"` + TxHash string `json:"txHash,omitempty"` + // Spender is the facilitator address vouchers must name as the only + // executor; surfaced so signers can bind it before reserving. + Spender string `json:"spender,omitempty"` +} + +// Gateway is the Hold/Release/Refund seam. Implementations must be safe for +// concurrent use by reconcile workers. +type Gateway interface { + // Reserve verifies + holds the reward auth for id. Idempotent. + Reserve(ctx context.Context, req ReserveRequest) (Receipt, error) + // Capture settles the held auth to the fulfiller. Idempotent: capturing + // an already-captured id returns the original receipt. + Capture(ctx context.Context, id string) (Receipt, error) + // Void drops the held auth (poster keeps funds). Voiding an unknown or + // already-voided id is not an error — refund flows must be re-runnable. + Void(ctx context.Context, id string) (Receipt, error) +} + +// BatchRecipient is one payee of a split capture (the eval-payment leg: each +// revealed counting evaluator gets perEvaluator from the held budget). +type BatchRecipient struct { + Address string `json:"address"` + Amount string `json:"amount"` +} + +// BatchGateway is the x402 batch-settlement seam: one held authorization +// captured to k recipients in one settlement (the eval budget → evaluators). +// Optional — callers type-assert and fall back to plain Capture. +type BatchGateway interface { + // CaptureBatch settles the held auth for id split across recipients. + CaptureBatch(ctx context.Context, id string, recipients []BatchRecipient) (Receipt, error) +} + +// ── HTTPGateway ───────────────────────────────────────────────────────────── + +// HTTPGateway drives the facilitator's escrow routes. +type HTTPGateway struct { + // Base is the facilitator URL, e.g. https://x402.gcp.obol.tech. + Base string + // Token authenticates capture/void (the release-authority credential). + Token string + Client *http.Client +} + +func (g *HTTPGateway) Reserve(ctx context.Context, req ReserveRequest) (Receipt, error) { + return g.post(ctx, "reserve", req.ID, req) +} + +func (g *HTTPGateway) Capture(ctx context.Context, id string) (Receipt, error) { + return g.post(ctx, "capture", id, nil) +} + +func (g *HTTPGateway) Void(ctx context.Context, id string) (Receipt, error) { + return g.post(ctx, "void", id, nil) +} + +// CaptureBatch drives the facilitator's batch-settlement capture: the held +// auth for id is settled to all recipients in one transaction. +func (g *HTTPGateway) CaptureBatch(ctx context.Context, id string, recipients []BatchRecipient) (Receipt, error) { + return g.post(ctx, "capture", id, map[string]any{"recipients": recipients}) +} + +func (g *HTTPGateway) post(ctx context.Context, op, id string, body any) (Receipt, error) { + var payload io.Reader + if body != nil { + raw, err := json.Marshal(body) + if err != nil { + return Receipt{}, err + } + payload = bytes.NewReader(raw) + } + + url := strings.TrimRight(g.Base, "/") + "/escrow/" + op + "/" + id + req, err := http.NewRequestWithContext(ctx, http.MethodPost, url, payload) + if err != nil { + return Receipt{}, err + } + req.Header.Set("Content-Type", "application/json") + if g.Token != "" { + req.Header.Set("Authorization", "Bearer "+g.Token) + } + + client := g.Client + if client == nil { + client = http.DefaultClient + } + resp, err := client.Do(req) + if err != nil { + return Receipt{}, fmt.Errorf("escrow %s %s: %w", op, id, err) + } + defer resp.Body.Close() + + raw, _ := io.ReadAll(io.LimitReader(resp.Body, 1<<16)) + if resp.StatusCode < 200 || resp.StatusCode >= 300 { + return Receipt{}, fmt.Errorf("escrow %s %s: facilitator returned %d: %s", op, id, resp.StatusCode, strings.TrimSpace(string(raw))) + } + + var receipt Receipt + if err := json.Unmarshal(raw, &receipt); err != nil { + return Receipt{}, fmt.Errorf("escrow %s %s: decode receipt: %w", op, id, err) + } + return receipt, nil +} + +// ── LedgerGateway (dev) ───────────────────────────────────────────────────── + +// LedgerGateway records escrow state in memory. Local-first dev mode only — +// no funds are verified or held anywhere. Receipts carry a dev-ledger TxHash +// so downstream surfaces can never present them as settlement. +type LedgerGateway struct { + mu sync.Mutex + states map[string]Receipt +} + +func NewLedgerGateway() *LedgerGateway { + return &LedgerGateway{states: make(map[string]Receipt)} +} + +func (g *LedgerGateway) Reserve(_ context.Context, req ReserveRequest) (Receipt, error) { + g.mu.Lock() + defer g.mu.Unlock() + if r, ok := g.states[req.ID]; ok { + return r, nil + } + r := Receipt{State: StateReserved, TxHash: "dev-ledger:" + req.ID} + g.states[req.ID] = r + return r, nil +} + +func (g *LedgerGateway) Capture(_ context.Context, id string) (Receipt, error) { + g.mu.Lock() + defer g.mu.Unlock() + r, ok := g.states[id] + if !ok { + return Receipt{}, fmt.Errorf("escrow capture %s: nothing reserved", id) + } + if r.State == StateVoided { + return Receipt{}, fmt.Errorf("escrow capture %s: already voided", id) + } + r.State = StateCaptured + g.states[id] = r + return r, nil +} + +// CaptureBatch marks the held budget captured with a dev-ledger receipt +// naming the recipient count — escrow theater, honestly labeled, like the +// rest of the ledger. +func (g *LedgerGateway) CaptureBatch(ctx context.Context, id string, recipients []BatchRecipient) (Receipt, error) { + r, err := g.Capture(ctx, id) + if err != nil { + return Receipt{}, err + } + r.TxHash = fmt.Sprintf("dev-ledger:%s:batch[%d]", id, len(recipients)) + g.mu.Lock() + g.states[id] = r + g.mu.Unlock() + return r, nil +} + +func (g *LedgerGateway) Void(_ context.Context, id string) (Receipt, error) { + g.mu.Lock() + defer g.mu.Unlock() + r, ok := g.states[id] + if !ok { + // Re-runnable refunds: voiding the unknown is a no-op success. + return Receipt{State: StateVoided, TxHash: "dev-ledger:" + id}, nil + } + if r.State == StateCaptured { + return Receipt{}, fmt.Errorf("escrow void %s: already captured", id) + } + r.State = StateVoided + g.states[id] = r + return r, nil +} diff --git a/internal/x402/escrow/gateway_test.go b/internal/x402/escrow/gateway_test.go new file mode 100644 index 00000000..7525a3fe --- /dev/null +++ b/internal/x402/escrow/gateway_test.go @@ -0,0 +1,105 @@ +package escrow + +import ( + "context" + "encoding/json" + "net/http" + "net/http/httptest" + "strings" + "testing" +) + +func TestLedgerGateway_Lifecycle(t *testing.T) { + g := NewLedgerGateway() + ctx := context.Background() + + r, err := g.Reserve(ctx, ReserveRequest{ID: "b1", Amount: "500.00"}) + if err != nil || r.State != StateReserved { + t.Fatalf("Reserve = %+v, %v", r, err) + } + if !strings.HasPrefix(r.TxHash, "dev-ledger:") { + t.Fatalf("ledger receipt %q must be labeled dev-ledger", r.TxHash) + } + + // Reserve is idempotent. + again, err := g.Reserve(ctx, ReserveRequest{ID: "b1"}) + if err != nil || again.State != StateReserved { + t.Fatalf("re-Reserve = %+v, %v", again, err) + } + + c, err := g.Capture(ctx, "b1") + if err != nil || c.State != StateCaptured { + t.Fatalf("Capture = %+v, %v", c, err) + } + + // Captured escrow cannot be voided (the reward was legitimately paid). + if _, err := g.Void(ctx, "b1"); err == nil { + t.Fatal("Void after Capture should error") + } +} + +func TestLedgerGateway_VoidPaths(t *testing.T) { + g := NewLedgerGateway() + ctx := context.Background() + + // Voiding the unknown is a no-op success (refunds must be re-runnable). + if r, err := g.Void(ctx, "ghost"); err != nil || r.State != StateVoided { + t.Fatalf("Void(unknown) = %+v, %v", r, err) + } + + if _, err := g.Reserve(ctx, ReserveRequest{ID: "b2"}); err != nil { + t.Fatal(err) + } + if r, err := g.Void(ctx, "b2"); err != nil || r.State != StateVoided { + t.Fatalf("Void(reserved) = %+v, %v", r, err) + } + // Capturing a voided escrow fails. + if _, err := g.Capture(ctx, "b2"); err == nil { + t.Fatal("Capture after Void should error") + } +} + +func TestHTTPGateway_RoutesAndAuth(t *testing.T) { + var gotPath, gotAuth string + var gotBody ReserveRequest + + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + gotPath = r.URL.Path + gotAuth = r.Header.Get("Authorization") + if r.Body != nil { + _ = json.NewDecoder(r.Body).Decode(&gotBody) + } + _ = json.NewEncoder(w).Encode(Receipt{State: StateReserved, TxHash: "0xabc"}) + })) + defer server.Close() + + g := &HTTPGateway{Base: server.URL + "/", Token: "secret", Client: server.Client()} + r, err := g.Reserve(context.Background(), ReserveRequest{ID: "b3", Network: "base", Amount: "10"}) + if err != nil { + t.Fatalf("Reserve: %v", err) + } + if gotPath != "/escrow/reserve/b3" { + t.Errorf("path = %q, want /escrow/reserve/b3", gotPath) + } + if gotAuth != "Bearer secret" { + t.Errorf("auth = %q, want bearer token", gotAuth) + } + if gotBody.Network != "base" || gotBody.Amount != "10" { + t.Errorf("body = %+v", gotBody) + } + if r.State != StateReserved || r.TxHash != "0xabc" { + t.Errorf("receipt = %+v", r) + } +} + +func TestHTTPGateway_SurfacesFacilitatorErrors(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + http.Error(w, "no such escrow", http.StatusNotFound) + })) + defer server.Close() + + g := &HTTPGateway{Base: server.URL, Client: server.Client()} + if _, err := g.Capture(context.Background(), "missing"); err == nil || !strings.Contains(err.Error(), "404") { + t.Fatalf("Capture error = %v, want 404 surfaced", err) + } +} diff --git a/internal/x402/escrow/permit2.go b/internal/x402/escrow/permit2.go new file mode 100644 index 00000000..70ba88b2 --- /dev/null +++ b/internal/x402/escrow/permit2.go @@ -0,0 +1,280 @@ +package escrow + +import ( + "crypto/ecdsa" + "fmt" + "math/big" + "strconv" + "strings" + "time" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/common/hexutil" + "github.com/ethereum/go-ethereum/common/math" + "github.com/ethereum/go-ethereum/crypto" + "github.com/ethereum/go-ethereum/signer/core/apitypes" + + "github.com/ObolNetwork/obol-stack/internal/erc8004" + "github.com/ObolNetwork/obol-stack/internal/x402" +) + +// Permit2Address is the canonical Uniswap Permit2 deployment, CREATE2-deployed +// at the same address on every EVM chain. +const Permit2Address = "0x000000000022D473030F116dDEE9F6B43aC78BA3" + +// ChainIDForNetwork resolves a chain alias ("base-sepolia") or CAIP-2 id +// ("eip155:84532") to its EIP-155 chain ID. Aliases route through the x402 +// chain registry so both legacy and CAIP-2 forms work (see pitfall: both +// forms must resolve or paid routes silently 404). +func ChainIDForNetwork(network string) (*big.Int, error) { + n := strings.TrimSpace(network) + if n == "" { + return nil, fmt.Errorf("permit2: empty network") + } + if rest, ok := strings.CutPrefix(strings.ToLower(n), "eip155:"); ok { + id, parsed := new(big.Int).SetString(rest, 10) + if !parsed || id.Sign() <= 0 { + return nil, fmt.Errorf("permit2: invalid CAIP-2 network %q", network) + } + return id, nil + } + info, err := x402.ResolveChainInfo(n) + if err != nil { + return nil, fmt.Errorf("permit2: resolve network %q: %w", network, err) + } + rest := strings.TrimPrefix(info.CAIP2Network, "eip155:") + id, parsed := new(big.Int).SetString(rest, 10) + if !parsed || id.Sign() <= 0 { + return nil, fmt.Errorf("permit2: chain registry returned invalid CAIP-2 id %q for %q", info.CAIP2Network, network) + } + return id, nil +} + +// voucherTypes is the EIP-712 type set for Permit2 SignatureTransfer +// PermitBatchTransferFrom. The domain deliberately has NO version field — +// Permit2's EIP712.sol hashes +// keccak256("EIP712Domain(string name,uint256 chainId,address verifyingContract)") +// with name "Permit2" and nothing else. +func voucherTypes() apitypes.Types { + return apitypes.Types{ + "EIP712Domain": { + {Name: "name", Type: "string"}, + {Name: "chainId", Type: "uint256"}, + {Name: "verifyingContract", Type: "address"}, + }, + "PermitBatchTransferFrom": { + {Name: "permitted", Type: "TokenPermissions[]"}, + {Name: "spender", Type: "address"}, + {Name: "nonce", Type: "uint256"}, + {Name: "deadline", Type: "uint256"}, + }, + "TokenPermissions": { + {Name: "token", Type: "address"}, + {Name: "amount", Type: "uint256"}, + }, + } +} + +// parseUint256 parses a non-negative decimal uint256 string. +func parseUint256(field, s string) (*big.Int, error) { + v, ok := new(big.Int).SetString(strings.TrimSpace(s), 10) + if !ok || v.Sign() < 0 || v.BitLen() > 256 { + return nil, fmt.Errorf("permit2: %s %q is not a decimal uint256", field, s) + } + return v, nil +} + +// parsePositiveAmount parses a strictly positive decimal uint256 amount. +func parsePositiveAmount(field, s string) (*big.Int, error) { + v, err := parseUint256(field, s) + if err != nil { + return nil, err + } + if v.Sign() <= 0 { + return nil, fmt.Errorf("permit2: %s %q must be a positive integer", field, s) + } + return v, nil +} + +// validateVoucherFields checks every field needed to hash the voucher (it does +// NOT check signature or deadline-in-future — that is VerifyVoucher's job). +func validateVoucherFields(v Permit2Voucher) error { + if !common.IsHexAddress(v.Owner) { + return fmt.Errorf("permit2: invalid owner address %q", v.Owner) + } + if !common.IsHexAddress(v.Token) { + return fmt.Errorf("permit2: invalid token address %q", v.Token) + } + if !common.IsHexAddress(v.Spender) { + return fmt.Errorf("permit2: invalid spender address %q", v.Spender) + } + if _, err := parseUint256("nonce", v.Nonce); err != nil { + return err + } + if v.Deadline <= 0 { + return fmt.Errorf("permit2: deadline %d must be a positive unix timestamp", v.Deadline) + } + if len(v.Recipients) == 0 { + return fmt.Errorf("permit2: voucher has no recipients") + } + for i, r := range v.Recipients { + if !common.IsHexAddress(r.Address) { + return fmt.Errorf("permit2: recipient %d has invalid address %q", i, r.Address) + } + if _, err := parsePositiveAmount(fmt.Sprintf("recipient %d amount", i), r.Amount); err != nil { + return err + } + } + return nil +} + +// VoucherTypedData builds the EIP-712 payload for a voucher in both the +// go-ethereum apitypes form (canonical digest, local signing) and the +// erc8004.EIP712TypedData form (RemoteSigner.SignTypedData wire payload). +// permitted[i] pairs with v.Recipients[i]: one TokenPermissions entry per +// recipient seat, all on the same token. The chainId in the remote-signer +// domain is a decimal string (math.HexOrDecimal256 accepts both forms). +func VoucherTypedData(v Permit2Voucher, chainID *big.Int) (apitypes.TypedData, erc8004.EIP712TypedData, error) { + if chainID == nil || chainID.Sign() <= 0 { + return apitypes.TypedData{}, erc8004.EIP712TypedData{}, fmt.Errorf("permit2: chainID must be positive") + } + if err := validateVoucherFields(v); err != nil { + return apitypes.TypedData{}, erc8004.EIP712TypedData{}, err + } + + token := common.HexToAddress(v.Token).Hex() + permitted := make([]interface{}, len(v.Recipients)) + for i, r := range v.Recipients { + permitted[i] = map[string]interface{}{ + "token": token, + "amount": r.Amount, + } + } + message := map[string]interface{}{ + "permitted": permitted, + "spender": common.HexToAddress(v.Spender).Hex(), + "nonce": v.Nonce, + "deadline": strconv.FormatInt(v.Deadline, 10), + } + + typed := apitypes.TypedData{ + Types: voucherTypes(), + PrimaryType: "PermitBatchTransferFrom", + Domain: apitypes.TypedDataDomain{ + Name: "Permit2", + ChainId: (*math.HexOrDecimal256)(new(big.Int).Set(chainID)), + VerifyingContract: Permit2Address, + }, + Message: message, + } + + remoteTypes := make(map[string][]erc8004.EIP712Field, len(typed.Types)) + for name, fields := range typed.Types { + converted := make([]erc8004.EIP712Field, len(fields)) + for i, f := range fields { + converted[i] = erc8004.EIP712Field{Name: f.Name, Type: f.Type} + } + remoteTypes[name] = converted + } + remote := erc8004.EIP712TypedData{ + Types: remoteTypes, + PrimaryType: "PermitBatchTransferFrom", + Domain: map[string]interface{}{ + "name": "Permit2", + "chainId": chainID.String(), + "verifyingContract": Permit2Address, + }, + Message: message, + } + return typed, remote, nil +} + +// HashVoucher returns the canonical EIP-712 digest the voucher signature +// commits to. +func HashVoucher(v Permit2Voucher, chainID *big.Int) (common.Hash, error) { + typed, _, err := VoucherTypedData(v, chainID) + if err != nil { + return common.Hash{}, err + } + digest, _, err := apitypes.TypedDataAndHash(typed) + if err != nil { + return common.Hash{}, fmt.Errorf("permit2: hash typed data: %w", err) + } + return common.BytesToHash(digest), nil +} + +// SignVoucher signs the voucher with key and fills v.Signature (65-byte +// 0x-hex, v in Ethereum 27/28 convention). When v.Owner is empty it is filled +// from the key; when set it must match the key's address. +func SignVoucher(v *Permit2Voucher, chainID *big.Int, key *ecdsa.PrivateKey) error { + if v == nil { + return fmt.Errorf("permit2: nil voucher") + } + if key == nil { + return fmt.Errorf("permit2: nil signing key") + } + addr := crypto.PubkeyToAddress(key.PublicKey) + if v.Owner == "" { + v.Owner = addr.Hex() + } else if common.HexToAddress(v.Owner) != addr { + return fmt.Errorf("permit2: voucher owner %s does not match signing key %s", v.Owner, addr.Hex()) + } + + hash, err := HashVoucher(*v, chainID) + if err != nil { + return err + } + sig, err := crypto.Sign(hash.Bytes(), key) + if err != nil { + return fmt.Errorf("permit2: sign voucher: %w", err) + } + sig[64] += 27 + v.Signature = hexutil.Encode(sig) + return nil +} + +// VerifyVoucher checks that the voucher is internally valid (addresses parse, +// every amount is a positive integer), names expectedSpender as the only +// executor, has not expired, and carries a signature that recovers to Owner. +func VerifyVoucher(v Permit2Voucher, chainID *big.Int, expectedSpender common.Address) error { + if err := validateVoucherFields(v); err != nil { + return err + } + if expectedSpender == (common.Address{}) { + return fmt.Errorf("permit2: expected spender is unset") + } + if common.HexToAddress(v.Spender) != expectedSpender { + return fmt.Errorf("permit2: voucher spender %s is not the facilitator %s", v.Spender, expectedSpender.Hex()) + } + if v.Deadline <= time.Now().Unix() { + return fmt.Errorf("permit2: voucher expired at %d", v.Deadline) + } + + sig, err := hexutil.Decode(v.Signature) + if err != nil { + return fmt.Errorf("permit2: decode signature: %w", err) + } + if len(sig) != 65 { + return fmt.Errorf("permit2: signature must be 65 bytes, got %d", len(sig)) + } + // Normalize Ethereum 27/28 recovery id to 0/1 for SigToPub. + recoverable := make([]byte, 65) + copy(recoverable, sig) + if recoverable[64] >= 27 { + recoverable[64] -= 27 + } + + hash, err := HashVoucher(v, chainID) + if err != nil { + return err + } + pub, err := crypto.SigToPub(hash.Bytes(), recoverable) + if err != nil { + return fmt.Errorf("permit2: recover signer: %w", err) + } + recovered := crypto.PubkeyToAddress(*pub) + if recovered != common.HexToAddress(v.Owner) { + return fmt.Errorf("permit2: signature recovers to %s, voucher owner is %s", recovered.Hex(), v.Owner) + } + return nil +} diff --git a/internal/x402/escrow/permit2_test.go b/internal/x402/escrow/permit2_test.go new file mode 100644 index 00000000..8e2a1c8e --- /dev/null +++ b/internal/x402/escrow/permit2_test.go @@ -0,0 +1,325 @@ +package escrow + +import ( + "crypto/ecdsa" + "math/big" + "strings" + "testing" + "time" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/crypto" +) + +// Anvil dev key 0 — fixed so signatures and digests are deterministic. +const anvilKey0 = "ac0974bec39a17e36ba4a6b4d238ff944bacb478cbed5efcae784d7bf4f2ff80" + +var testSpender = common.HexToAddress("0x70997970C51812dc3A010C7d01b50e0d17dc79C8") // anvil #1 + +func testKey(t *testing.T) *ecdsa.PrivateKey { + t.Helper() + key, err := crypto.HexToECDSA(anvilKey0) + if err != nil { + t.Fatalf("parse test key: %v", err) + } + return key +} + +// goldenVoucher is the fixed voucher every pinned value derives from. +// Deadline 1893456000 = 2030-01-01T00:00:00Z. +func goldenVoucher(t *testing.T) (Permit2Voucher, *ecdsa.PrivateKey) { + t.Helper() + key := testKey(t) + return Permit2Voucher{ + Owner: crypto.PubkeyToAddress(key.PublicKey).Hex(), // 0xf39F...2266 + Token: "0x036CbD53842c5426634e7929541eC2318f3dCF7e", + Network: "base-sepolia", + Spender: testSpender.Hex(), + Nonce: "1", + Deadline: 1893456000, + Recipients: []BatchRecipient{ + {Address: "0x3C44CdDdB6a900fa2b585dd299e03d12FA4293BC", Amount: "1000"}, + {Address: "0x90F79bf6EB2c4f870365E785982E1f101E93b906", Amount: "2500"}, + }, + }, key +} + +func TestChainIDForNetwork(t *testing.T) { + for _, tc := range []struct { + network string + want int64 + }{ + {"base-sepolia", 84532}, + {"base", 8453}, + {"eip155:84532", 84532}, + {"EIP155:31337", 31337}, // arbitrary CAIP-2 ids pass through + {"ethereum", 1}, + } { + got, err := ChainIDForNetwork(tc.network) + if err != nil { + t.Fatalf("ChainIDForNetwork(%q): %v", tc.network, err) + } + if got.Int64() != tc.want { + t.Errorf("ChainIDForNetwork(%q) = %s, want %d", tc.network, got, tc.want) + } + } + if _, err := ChainIDForNetwork("not-a-chain"); err == nil { + t.Error("ChainIDForNetwork(not-a-chain) should fail") + } + if _, err := ChainIDForNetwork(""); err == nil { + t.Error("ChainIDForNetwork(empty) should fail") + } +} + +func TestSignVoucher_VerifyRoundTrip(t *testing.T) { + v, key := goldenVoucher(t) + v.Deadline = time.Now().Add(time.Hour).Unix() + chainID := big.NewInt(84532) + + if err := SignVoucher(&v, chainID, key); err != nil { + t.Fatalf("SignVoucher: %v", err) + } + if v.Signature == "" || !strings.HasPrefix(v.Signature, "0x") || len(v.Signature) != 132 { + t.Fatalf("Signature = %q, want 65-byte 0x-hex", v.Signature) + } + if err := VerifyVoucher(v, chainID, testSpender); err != nil { + t.Fatalf("VerifyVoucher: %v", err) + } +} + +func TestSignVoucher_FillsOwnerAndRejectsMismatch(t *testing.T) { + v, key := goldenVoucher(t) + v.Deadline = time.Now().Add(time.Hour).Unix() + chainID := big.NewInt(84532) + + v.Owner = "" + if err := SignVoucher(&v, chainID, key); err != nil { + t.Fatalf("SignVoucher with empty owner: %v", err) + } + want := crypto.PubkeyToAddress(key.PublicKey) + if common.HexToAddress(v.Owner) != want { + t.Errorf("Owner = %s, want %s", v.Owner, want.Hex()) + } + + v.Owner = testSpender.Hex() // not the key's address + if err := SignVoucher(&v, chainID, key); err == nil { + t.Error("SignVoucher should reject owner/key mismatch") + } +} + +func TestVerifyVoucher_WrongSpender(t *testing.T) { + v, key := goldenVoucher(t) + v.Deadline = time.Now().Add(time.Hour).Unix() + chainID := big.NewInt(84532) + if err := SignVoucher(&v, chainID, key); err != nil { + t.Fatal(err) + } + + other := crypto.PubkeyToAddress(testKey(t).PublicKey) // owner, not spender + if err := VerifyVoucher(v, chainID, other); err == nil || !strings.Contains(err.Error(), "spender") { + t.Fatalf("VerifyVoucher with wrong spender = %v, want spender binding error", err) + } + if err := VerifyVoucher(v, chainID, common.Address{}); err == nil { + t.Fatal("VerifyVoucher with zero expected spender should fail") + } +} + +func TestVerifyVoucher_Expired(t *testing.T) { + v, key := goldenVoucher(t) + v.Deadline = time.Now().Add(-time.Minute).Unix() + chainID := big.NewInt(84532) + if err := SignVoucher(&v, chainID, key); err != nil { + t.Fatal(err) + } + if err := VerifyVoucher(v, chainID, testSpender); err == nil || !strings.Contains(err.Error(), "expired") { + t.Fatalf("VerifyVoucher(expired) = %v, want expiry error", err) + } +} + +func TestVerifyVoucher_TamperedVoucherFails(t *testing.T) { + chainID := big.NewInt(84532) + + tamper := []struct { + name string + mutate func(*Permit2Voucher) + }{ + {"amount", func(v *Permit2Voucher) { v.Recipients[0].Amount = "999999" }}, + {"nonce", func(v *Permit2Voucher) { v.Nonce = "2" }}, + {"deadline", func(v *Permit2Voucher) { v.Deadline += 60 }}, + {"token", func(v *Permit2Voucher) { v.Token = "0x833589fCD6eDb6E08f4c7C32D4f71b54bdA02913" }}, + } + for _, tc := range tamper { + t.Run(tc.name, func(t *testing.T) { + v, key := goldenVoucher(t) + v.Deadline = time.Now().Add(time.Hour).Unix() + if err := SignVoucher(&v, chainID, key); err != nil { + t.Fatal(err) + } + tc.mutate(&v) + if err := VerifyVoucher(v, chainID, testSpender); err == nil { + t.Fatalf("VerifyVoucher should fail after tampering with %s", tc.name) + } + }) + } + + // Wrong chain id also breaks the signature (domain separator changes). + v, key := goldenVoucher(t) + v.Deadline = time.Now().Add(time.Hour).Unix() + if err := SignVoucher(&v, chainID, key); err != nil { + t.Fatal(err) + } + if err := VerifyVoucher(v, big.NewInt(8453), testSpender); err == nil { + t.Fatal("VerifyVoucher should fail on a different chain id") + } +} + +// TestVoucherRecipientAddressIsPolicyBoundNotSignatureBound documents a known +// property of standard (non-witness) Permit2 SignatureTransfer: the signature +// commits to the (token, amount) seats, spender, nonce, and deadline — NOT to +// recipient addresses, which live only in transferDetails at execution time. +// Recipient binding is therefore facilitator POLICY (capture pays only the +// stored voucher's seats, transported under the bearer-token reserve), not +// cryptography. Binding addresses into the signature would require the +// PermitBatchWitnessTransferFrom variant. +func TestVoucherRecipientAddressIsPolicyBoundNotSignatureBound(t *testing.T) { + chainID := big.NewInt(84532) + v, key := goldenVoucher(t) + v.Deadline = time.Now().Add(time.Hour).Unix() + if err := SignVoucher(&v, chainID, key); err != nil { + t.Fatal(err) + } + v.Recipients[1].Address = testSpender.Hex() // same amount, different payee + if err := VerifyVoucher(v, chainID, testSpender); err != nil { + t.Fatalf("recipient address is not part of the Permit2 digest; verify = %v", err) + } +} + +func TestVerifyVoucher_FieldValidation(t *testing.T) { + chainID := big.NewInt(84532) + cases := []struct { + name string + mutate func(*Permit2Voucher) + }{ + {"zero amount", func(v *Permit2Voucher) { v.Recipients[0].Amount = "0" }}, + {"negative amount", func(v *Permit2Voucher) { v.Recipients[0].Amount = "-5" }}, + {"non-numeric amount", func(v *Permit2Voucher) { v.Recipients[0].Amount = "1.5 USDC" }}, + {"no recipients", func(v *Permit2Voucher) { v.Recipients = nil }}, + {"bad owner", func(v *Permit2Voucher) { v.Owner = "owner" }}, + {"bad nonce", func(v *Permit2Voucher) { v.Nonce = "0xzz" }}, + {"zero deadline", func(v *Permit2Voucher) { v.Deadline = 0 }}, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + v, key := goldenVoucher(t) + v.Deadline = time.Now().Add(time.Hour).Unix() + if err := SignVoucher(&v, chainID, key); err != nil { + t.Fatal(err) + } + tc.mutate(&v) + if err := VerifyVoucher(v, chainID, testSpender); err == nil { + t.Fatalf("VerifyVoucher should reject %s", tc.name) + } + }) + } +} + +// TestHashVoucher_GoldenAndManualReconstruction pins the canonical digest and +// independently reconstructs it with raw keccak over Permit2's PermitHash +// semantics — proving the apitypes encoding matches the on-chain library +// (domain WITHOUT version, nested TokenPermissions[] array of struct hashes). +func TestHashVoucher_GoldenAndManualReconstruction(t *testing.T) { + v, _ := goldenVoucher(t) + chainID := big.NewInt(84532) + + got, err := HashVoucher(v, chainID) + if err != nil { + t.Fatalf("HashVoucher: %v", err) + } + const golden = "0x352592eb204c815305c91afb79b1136fe4714297bd5cbb0c6ed3fe75fa8e6a75" + if got.Hex() != golden { + t.Errorf("HashVoucher = %s, want pinned %s", got.Hex(), golden) + } + + // Manual reconstruction, mirroring permit2/src/libraries/PermitHash.sol. + pad := func(b []byte) []byte { return common.LeftPadBytes(b, 32) } + domainTypeHash := crypto.Keccak256([]byte("EIP712Domain(string name,uint256 chainId,address verifyingContract)")) + domainSep := crypto.Keccak256( + domainTypeHash, + crypto.Keccak256([]byte("Permit2")), + pad(chainID.Bytes()), + pad(common.HexToAddress(Permit2Address).Bytes()), + ) + + tokenPermTypeHash := crypto.Keccak256([]byte("TokenPermissions(address token,uint256 amount)")) + var permHashes []byte + for _, r := range v.Recipients { + amount, ok := new(big.Int).SetString(r.Amount, 10) + if !ok { + t.Fatalf("amount %q", r.Amount) + } + permHashes = append(permHashes, crypto.Keccak256( + tokenPermTypeHash, + pad(common.HexToAddress(v.Token).Bytes()), + pad(amount.Bytes()), + )...) + } + permittedHash := crypto.Keccak256(permHashes) + + batchTypeHash := crypto.Keccak256([]byte( + "PermitBatchTransferFrom(TokenPermissions[] permitted,address spender,uint256 nonce,uint256 deadline)TokenPermissions(address token,uint256 amount)", + )) + nonce, _ := new(big.Int).SetString(v.Nonce, 10) + structHash := crypto.Keccak256( + batchTypeHash, + permittedHash, + pad(common.HexToAddress(v.Spender).Bytes()), + pad(nonce.Bytes()), + pad(big.NewInt(v.Deadline).Bytes()), + ) + + manual := crypto.Keccak256([]byte("\x19\x01"), domainSep, structHash) + if common.BytesToHash(manual) != got { + t.Errorf("manual PermitHash reconstruction %x != HashVoucher %s", manual, got.Hex()) + } +} + +func TestVoucherTypedData_RemotePayloadShape(t *testing.T) { + v, _ := goldenVoucher(t) + chainID := big.NewInt(84532) + + typed, remote, err := VoucherTypedData(v, chainID) + if err != nil { + t.Fatalf("VoucherTypedData: %v", err) + } + if typed.PrimaryType != "PermitBatchTransferFrom" || remote.PrimaryType != "PermitBatchTransferFrom" { + t.Errorf("primary types = %q / %q", typed.PrimaryType, remote.PrimaryType) + } + + // Permit2's domain has NO version field. + if _, ok := remote.Domain["version"]; ok { + t.Error("remote domain must not carry a version field (Permit2 omits it)") + } + for _, f := range remote.Types["EIP712Domain"] { + if f.Name == "version" { + t.Error("EIP712Domain type must not declare version") + } + } + if remote.Domain["name"] != "Permit2" { + t.Errorf("domain name = %v", remote.Domain["name"]) + } + if remote.Domain["chainId"] != "84532" { + t.Errorf("domain chainId = %v, want decimal string", remote.Domain["chainId"]) + } + if remote.Domain["verifyingContract"] != Permit2Address { + t.Errorf("verifyingContract = %v", remote.Domain["verifyingContract"]) + } + + permitted, ok := remote.Message["permitted"].([]interface{}) + if !ok || len(permitted) != len(v.Recipients) { + t.Fatalf("permitted = %#v, want one entry per recipient seat", remote.Message["permitted"]) + } + + if _, _, err := VoucherTypedData(v, nil); err == nil { + t.Error("VoucherTypedData(nil chainID) should fail") + } +} diff --git a/internal/x402/escrow/server.go b/internal/x402/escrow/server.go new file mode 100644 index 00000000..dc6b5488 --- /dev/null +++ b/internal/x402/escrow/server.go @@ -0,0 +1,382 @@ +package escrow + +import ( + "crypto/subtle" + "encoding/json" + "fmt" + "io" + "math/big" + "net/http" + "regexp" + "strings" + "sync" + "time" + + "github.com/ethereum/go-ethereum/common" +) + +// idPattern bounds escrow ids to safe filenames (the store keys files by id). +// ServiceBounty UIDs (RFC-4122) always match. +var idPattern = regexp.MustCompile(`^[A-Za-z0-9][A-Za-z0-9._-]{0,127}$`) + +// ServerOptions configures the escrow facilitator HTTP surface. +type ServerOptions struct { + // Token is the bearer credential required on every /escrow/{op} POST. + // Empty disables auth, mirroring HTTPGateway, which omits the + // Authorization header entirely when its Token is empty. + Token string + // Spender is the facilitator's settlement address — the only executor + // vouchers may name. Zero means no signing identity is configured: + // voucher-less reserves still work (with an empty spender hint) but + // voucher verification and capture are refused. + Spender common.Address + // Networks are the chain aliases this facilitator settles on, surfaced + // via GET /escrow/info. When non-empty, vouchers on other chains are + // rejected at reserve time. + Networks []string + // Submitter executes captures on-chain. Nil disables capture (503) while + // reserve/void keep working. + Submitter Submitter +} + +// Server implements the facilitator side of the escrow wire protocol that +// HTTPGateway speaks: POST /escrow/{reserve,capture,void}/{id} plus +// GET /escrow/info and GET /healthz. The server holds no nonce-invalidation +// logic: Void is store-only, because the voucher deadline is the hard +// on-chain guarantee (v1 — no invalidateUnorderedNonces call). +type Server struct { + store *Store + token string + spender common.Address + networks []string + networkIDs []*big.Int + submitter Submitter + + mu sync.Mutex + locks map[string]*sync.Mutex +} + +// NewServer builds a Server over a Store. +func NewServer(store *Store, opts ServerOptions) *Server { + s := &Server{ + store: store, + token: opts.Token, + spender: opts.Spender, + networks: opts.Networks, + submitter: opts.Submitter, + locks: make(map[string]*sync.Mutex), + } + for _, n := range opts.Networks { + if id, err := ChainIDForNetwork(n); err == nil { + s.networkIDs = append(s.networkIDs, id) + } + } + return s +} + +// Handler returns the HTTP mux for the escrow surface. +func (s *Server) Handler() http.Handler { + mux := http.NewServeMux() + mux.HandleFunc("POST /escrow/reserve/{id}", s.auth(s.handleReserve)) + mux.HandleFunc("POST /escrow/capture/{id}", s.auth(s.handleCapture)) + mux.HandleFunc("POST /escrow/void/{id}", s.auth(s.handleVoid)) + mux.HandleFunc("GET /escrow/info", s.handleInfo) + mux.HandleFunc("GET /healthz", s.handleHealthz) + return mux +} + +// auth enforces the bearer token with a constant-time comparison. An empty +// configured token disables auth (the HTTPGateway client omits the +// Authorization header when its token is empty, so this is the symmetric +// choice for local-first dev; production must set OBOL_ESCROW_TOKEN). +func (s *Server) auth(next http.HandlerFunc) http.HandlerFunc { + return func(w http.ResponseWriter, r *http.Request) { + if s.token == "" { + next(w, r) + return + } + got := strings.TrimPrefix(r.Header.Get("Authorization"), "Bearer ") + if subtle.ConstantTimeCompare([]byte(got), []byte(s.token)) != 1 { + http.Error(w, "unauthorized", http.StatusUnauthorized) + return + } + next(w, r) + } +} + +// lockID serializes operations per escrow id (captures can take minutes on +// chain; a global lock would head-of-line-block every other escrow). +func (s *Server) lockID(id string) func() { + s.mu.Lock() + m, ok := s.locks[id] + if !ok { + m = &sync.Mutex{} + s.locks[id] = m + } + s.mu.Unlock() + m.Lock() + return m.Unlock +} + +func (s *Server) spenderHex() string { + if s.spender == (common.Address{}) { + return "" + } + return s.spender.Hex() +} + +func writeJSON(w http.ResponseWriter, v any) { + w.Header().Set("Content-Type", "application/json") + _ = json.NewEncoder(w).Encode(v) +} + +func pathID(w http.ResponseWriter, r *http.Request) (string, bool) { + id := r.PathValue("id") + if !idPattern.MatchString(id) { + http.Error(w, "invalid escrow id", http.StatusBadRequest) + return "", false + } + return id, true +} + +// networkAllowed checks the voucher's chain against the configured networks. +func (s *Server) networkAllowed(chainID *big.Int) bool { + if len(s.networkIDs) == 0 { + return true + } + for _, id := range s.networkIDs { + if id.Cmp(chainID) == 0 { + return true + } + } + return false +} + +func (s *Server) handleReserve(w http.ResponseWriter, r *http.Request) { + id, ok := pathID(w, r) + if !ok { + return + } + + var req ReserveRequest + if err := json.NewDecoder(io.LimitReader(r.Body, 1<<20)).Decode(&req); err != nil { + http.Error(w, "decode reserve request: "+err.Error(), http.StatusBadRequest) + return + } + if req.ID != "" && req.ID != id { + http.Error(w, fmt.Sprintf("body id %q does not match path id %q", req.ID, id), http.StatusBadRequest) + return + } + req.ID = id + + unlock := s.lockID(id) + defer unlock() + + entry, exists, err := s.store.Get(id) + if err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } + // Already settled: idempotent success with the stored receipt. + if exists && entry.State == StateCaptured { + writeJSON(w, entry.Receipt) + return + } + // A voucher-less re-reserve never downgrades a held voucher. + if exists && entry.State == StateReserved && req.Voucher == nil { + writeJSON(w, entry.Receipt) + return + } + + var receipt Receipt + if req.Voucher == nil { + receipt = Receipt{State: StateAwaitingVoucher, Spender: s.spenderHex()} + } else { + if s.spender == (common.Address{}) { + http.Error(w, "facilitator signing address not configured; cannot verify voucher spender binding", http.StatusServiceUnavailable) + return + } + chainID, err := ChainIDForNetwork(req.Voucher.Network) + if err != nil { + http.Error(w, err.Error(), http.StatusBadRequest) + return + } + if !s.networkAllowed(chainID) { + http.Error(w, fmt.Sprintf("voucher network %q is not served by this facilitator (networks: %s)", req.Voucher.Network, strings.Join(s.networks, ", ")), http.StatusBadRequest) + return + } + // Guard alias drift between the reserve leg and the voucher. + if req.Network != "" { + if reqChain, err := ChainIDForNetwork(req.Network); err == nil && reqChain.Cmp(chainID) != 0 { + http.Error(w, fmt.Sprintf("reserve network %q and voucher network %q resolve to different chains", req.Network, req.Voucher.Network), http.StatusBadRequest) + return + } + } + if err := VerifyVoucher(*req.Voucher, chainID, s.spender); err != nil { + http.Error(w, err.Error(), http.StatusBadRequest) + return + } + receipt = Receipt{State: StateReserved, Spender: s.spenderHex()} + } + + if err := s.store.Put(Entry{ID: id, State: receipt.State, Request: &req, Receipt: receipt, UpdatedAt: time.Now().UTC()}); err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } + writeJSON(w, receipt) +} + +// captureRequest is the optional capture body. HTTPGateway.Capture sends no +// body (capture every voucher seat); HTTPGateway.CaptureBatch sends +// {"recipients":[...]} (capture the subset, omitted seats unpaid). +type captureRequest struct { + Recipients []BatchRecipient `json:"recipients"` +} + +func (s *Server) handleCapture(w http.ResponseWriter, r *http.Request) { + id, ok := pathID(w, r) + if !ok { + return + } + + raw, err := io.ReadAll(io.LimitReader(r.Body, 1<<20)) + if err != nil { + http.Error(w, "read capture request: "+err.Error(), http.StatusBadRequest) + return + } + var req captureRequest + if body := strings.TrimSpace(string(raw)); body != "" { + if err := json.Unmarshal(raw, &req); err != nil { + http.Error(w, "decode capture request: "+err.Error(), http.StatusBadRequest) + return + } + if req.Recipients != nil && len(req.Recipients) == 0 { + http.Error(w, "capture requested an explicitly empty recipient set", http.StatusBadRequest) + return + } + } + + unlock := s.lockID(id) + defer unlock() + + entry, exists, err := s.store.Get(id) + if err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } + if !exists { + http.Error(w, "no such escrow", http.StatusNotFound) + return + } + + switch entry.State { + case StateCaptured: + // Idempotent: the stored receipt, no second settlement. + writeJSON(w, entry.Receipt) + return + case StateVoided: + http.Error(w, "escrow already voided", http.StatusConflict) + return + case StateAwaitingVoucher: + http.Error(w, "no voucher attached; re-reserve with a signed voucher first", http.StatusConflict) + return + case StateReserved: + // fall through to settle + default: + http.Error(w, "unknown escrow state "+entry.State, http.StatusInternalServerError) + return + } + if entry.Request == nil || entry.Request.Voucher == nil { + http.Error(w, "reserved escrow has no voucher", http.StatusConflict) + return + } + if s.submitter == nil { + http.Error(w, "settlement unavailable: facilitator has no signing key (set OBOL_ESCROW_KEY or OBOL_ESCROW_SIGNER_URL)", http.StatusServiceUnavailable) + return + } + + voucher := *entry.Request.Voucher + requested := req.Recipients + if requested == nil { + requested = voucher.Recipients + } + details, err := BuildTransferDetails(voucher, requested) + if err != nil { + http.Error(w, err.Error(), http.StatusBadRequest) + return + } + + txHash, err := s.submitter.Submit(r.Context(), voucher, details) + if err != nil { + http.Error(w, "settlement failed: "+err.Error(), http.StatusBadGateway) + return + } + + receipt := Receipt{State: StateCaptured, TxHash: txHash, Spender: s.spenderHex()} + entry.State = StateCaptured + entry.Receipt = receipt + entry.UpdatedAt = time.Now().UTC() + if err := s.store.Put(entry); err != nil { + // The transfer settled; surface the receipt anyway and log loudly via + // the error body on the next idempotent call. + http.Error(w, fmt.Sprintf("settled in %s but failed to persist receipt: %v", txHash, err), http.StatusInternalServerError) + return + } + writeJSON(w, receipt) +} + +func (s *Server) handleVoid(w http.ResponseWriter, r *http.Request) { + id, ok := pathID(w, r) + if !ok { + return + } + + unlock := s.lockID(id) + defer unlock() + + entry, exists, err := s.store.Get(id) + if err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } + if !exists { + // Re-runnable refunds: voiding the unknown is a no-op success + // (mirrors LedgerGateway). Nothing is persisted for unknown ids. + writeJSON(w, Receipt{State: StateVoided}) + return + } + if entry.State == StateCaptured { + http.Error(w, "escrow already captured", http.StatusConflict) + return + } + + receipt := Receipt{State: StateVoided, Spender: s.spenderHex()} + entry.State = StateVoided + entry.Receipt = receipt + entry.UpdatedAt = time.Now().UTC() + if err := s.store.Put(entry); err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } + writeJSON(w, receipt) +} + +// infoResponse is the GET /escrow/info body: the facilitator settlement +// address signers must bind as the voucher spender, and the chains served. +type infoResponse struct { + Address string `json:"address"` + Networks []string `json:"networks"` +} + +func (s *Server) handleInfo(w http.ResponseWriter, _ *http.Request) { + networks := s.networks + if networks == nil { + networks = []string{} + } + writeJSON(w, infoResponse{Address: s.spenderHex(), Networks: networks}) +} + +func (s *Server) handleHealthz(w http.ResponseWriter, _ *http.Request) { + w.WriteHeader(http.StatusOK) + _, _ = w.Write([]byte("ok")) +} diff --git a/internal/x402/escrow/server_test.go b/internal/x402/escrow/server_test.go new file mode 100644 index 00000000..80348366 --- /dev/null +++ b/internal/x402/escrow/server_test.go @@ -0,0 +1,416 @@ +package escrow + +import ( + "context" + "encoding/json" + "fmt" + "math/big" + "net/http" + "net/http/httptest" + "strings" + "sync/atomic" + "testing" + "time" +) + +// fakeSubmitter records submissions and returns a canned tx hash. +type fakeSubmitter struct { + calls atomic.Int64 + voucher Permit2Voucher + details []TransferDetail + err error +} + +func (f *fakeSubmitter) Submit(_ context.Context, v Permit2Voucher, details []TransferDetail) (string, error) { + f.calls.Add(1) + f.voucher = v + f.details = details + if f.err != nil { + return "", f.err + } + return "0xfeedface", nil +} + +// newTestServer wires a Server over a temp-dir store and returns an +// HTTPGateway client pointed at it — proving the server speaks exactly the +// wire shape the gateway client expects. +func newTestServer(t *testing.T, opts ServerOptions) (*httptest.Server, *HTTPGateway, *Store) { + t.Helper() + store, err := NewStore(t.TempDir()) + if err != nil { + t.Fatal(err) + } + return newTestServerWithStore(t, store, opts) +} + +func newTestServerWithStore(t *testing.T, store *Store, opts ServerOptions) (*httptest.Server, *HTTPGateway, *Store) { + t.Helper() + srv := httptest.NewServer(NewServer(store, opts).Handler()) + t.Cleanup(srv.Close) + g := &HTTPGateway{Base: srv.URL, Token: opts.Token, Client: srv.Client()} + return srv, g, store +} + +// signedTestVoucher returns a voucher bound to testSpender, expiring in 1h. +func signedTestVoucher(t *testing.T) Permit2Voucher { + t.Helper() + v, key := goldenVoucher(t) + v.Deadline = time.Now().Add(time.Hour).Unix() + if err := SignVoucher(&v, big.NewInt(84532), key); err != nil { + t.Fatal(err) + } + return v +} + +func TestServer_ReserveAwaitingThenReserved(t *testing.T) { + sub := &fakeSubmitter{} + _, g, _ := newTestServer(t, ServerOptions{Token: "secret", Spender: testSpender, Networks: []string{"base", "base-sepolia"}, Submitter: sub}) + ctx := context.Background() + + // 1. Reserve without a voucher: AwaitingVoucher + the spender to bind. + r, err := g.Reserve(ctx, ReserveRequest{ID: "b1", Network: "base-sepolia", Amount: "3500"}) + if err != nil { + t.Fatalf("Reserve: %v", err) + } + if r.State != StateAwaitingVoucher { + t.Fatalf("state = %q, want AwaitingVoucher", r.State) + } + if r.Spender != testSpender.Hex() { + t.Fatalf("spender hint = %q, want %s", r.Spender, testSpender.Hex()) + } + + // 2. Re-reserve with a voucher binding that spender: Reserved. + v := signedTestVoucher(t) + r2, err := g.Reserve(ctx, ReserveRequest{ID: "b1", Network: "base-sepolia", Voucher: &v}) + if err != nil { + t.Fatalf("re-Reserve with voucher: %v", err) + } + if r2.State != StateReserved || r2.Spender != testSpender.Hex() { + t.Fatalf("receipt = %+v, want Reserved", r2) + } + + // 3. A later voucher-less re-reserve must NOT downgrade the held voucher. + r3, err := g.Reserve(ctx, ReserveRequest{ID: "b1", Network: "base-sepolia"}) + if err != nil { + t.Fatalf("voucher-less re-Reserve: %v", err) + } + if r3.State != StateReserved { + t.Fatalf("voucher-less re-reserve downgraded state to %q", r3.State) + } +} + +func TestServer_ReserveRejectsBadVouchers(t *testing.T) { + _, g, _ := newTestServer(t, ServerOptions{Token: "secret", Spender: testSpender, Networks: []string{"base-sepolia"}, Submitter: &fakeSubmitter{}}) + ctx := context.Background() + + // Wrong spender binding. + v, key := goldenVoucher(t) + v.Deadline = time.Now().Add(time.Hour).Unix() + v.Spender = "0x3C44CdDdB6a900fa2b585dd299e03d12FA4293BC" + if err := SignVoucher(&v, big.NewInt(84532), key); err != nil { + t.Fatal(err) + } + if _, err := g.Reserve(ctx, ReserveRequest{ID: "bad1", Voucher: &v}); err == nil || !strings.Contains(err.Error(), "400") { + t.Errorf("wrong-spender reserve = %v, want 400", err) + } + + // Tampered amount. + v2 := signedTestVoucher(t) + v2.Recipients[0].Amount = "999999" + if _, err := g.Reserve(ctx, ReserveRequest{ID: "bad2", Voucher: &v2}); err == nil || !strings.Contains(err.Error(), "400") { + t.Errorf("tampered reserve = %v, want 400", err) + } + + // Network not served by this facilitator. + v3, key3 := goldenVoucher(t) + v3.Deadline = time.Now().Add(time.Hour).Unix() + v3.Network = "base" + if err := SignVoucher(&v3, big.NewInt(8453), key3); err != nil { + t.Fatal(err) + } + if _, err := g.Reserve(ctx, ReserveRequest{ID: "bad3", Voucher: &v3}); err == nil || !strings.Contains(err.Error(), "not served") { + t.Errorf("off-network reserve = %v, want network rejection", err) + } + + // Reserve leg / voucher network drift. + v4 := signedTestVoucher(t) + if _, err := g.Reserve(ctx, ReserveRequest{ID: "bad4", Network: "base", Voucher: &v4}); err == nil || !strings.Contains(err.Error(), "different chains") { + t.Errorf("network-drift reserve = %v, want mismatch error", err) + } +} + +func TestServer_CaptureBatchHappyPathAndIdempotency(t *testing.T) { + sub := &fakeSubmitter{} + _, g, _ := newTestServer(t, ServerOptions{Token: "secret", Spender: testSpender, Networks: []string{"base-sepolia"}, Submitter: sub}) + ctx := context.Background() + + v := signedTestVoucher(t) + if _, err := g.Reserve(ctx, ReserveRequest{ID: "b2", Voucher: &v}); err != nil { + t.Fatal(err) + } + + // Capture a subset: seat 0 only; seat 1 must ride along index-wise at 0. + r, err := g.CaptureBatch(ctx, "b2", []BatchRecipient{{Address: v.Recipients[0].Address, Amount: "1000"}}) + if err != nil { + t.Fatalf("CaptureBatch: %v", err) + } + if r.State != StateCaptured || r.TxHash != "0xfeedface" { + t.Fatalf("receipt = %+v", r) + } + if got := sub.calls.Load(); got != 1 { + t.Fatalf("submitter calls = %d, want 1", got) + } + if len(sub.details) != 2 || sub.details[0].Amount.Cmp(big.NewInt(1000)) != 0 || sub.details[1].Amount.Sign() != 0 { + t.Fatalf("submitted details = %+v, want index-wise [1000, 0]", sub.details) + } + + // Idempotent: a second capture returns the stored receipt, no re-settle. + r2, err := g.Capture(ctx, "b2") + if err != nil { + t.Fatalf("re-Capture: %v", err) + } + if r2.TxHash != "0xfeedface" || sub.calls.Load() != 1 { + t.Fatalf("re-capture = %+v after %d submits, want stored receipt and 1 submit", r2, sub.calls.Load()) + } + + // Re-reserve after capture returns the captured receipt (settled is settled). + r3, err := g.Reserve(ctx, ReserveRequest{ID: "b2", Voucher: &v}) + if err != nil { + t.Fatal(err) + } + if r3.State != StateCaptured { + t.Fatalf("re-reserve after capture = %+v", r3) + } +} + +func TestServer_CaptureFullVoucherWhenNoBody(t *testing.T) { + sub := &fakeSubmitter{} + _, g, _ := newTestServer(t, ServerOptions{Token: "secret", Spender: testSpender, Submitter: sub}) + ctx := context.Background() + + v := signedTestVoucher(t) + if _, err := g.Reserve(ctx, ReserveRequest{ID: "b3", Voucher: &v}); err != nil { + t.Fatal(err) + } + // Plain Capture (no body) settles every voucher seat. + if _, err := g.Capture(ctx, "b3"); err != nil { + t.Fatalf("Capture: %v", err) + } + if len(sub.details) != 2 || sub.details[0].Amount.Cmp(big.NewInt(1000)) != 0 || sub.details[1].Amount.Cmp(big.NewInt(2500)) != 0 { + t.Fatalf("details = %+v, want full [1000, 2500]", sub.details) + } +} + +func TestServer_CaptureGuards(t *testing.T) { + sub := &fakeSubmitter{} + _, g, _ := newTestServer(t, ServerOptions{Token: "secret", Spender: testSpender, Submitter: sub}) + ctx := context.Background() + + // Unknown escrow. + if _, err := g.Capture(ctx, "ghost"); err == nil || !strings.Contains(err.Error(), "404") { + t.Errorf("capture unknown = %v, want 404", err) + } + + // AwaitingVoucher (no voucher attached) cannot capture. + if _, err := g.Reserve(ctx, ReserveRequest{ID: "b4"}); err != nil { + t.Fatal(err) + } + if _, err := g.Capture(ctx, "b4"); err == nil || !strings.Contains(err.Error(), "409") { + t.Errorf("capture awaiting = %v, want 409", err) + } + + // Subset rule: amount mismatch is rejected before any submission. + v := signedTestVoucher(t) + if _, err := g.Reserve(ctx, ReserveRequest{ID: "b5", Voucher: &v}); err != nil { + t.Fatal(err) + } + if _, err := g.CaptureBatch(ctx, "b5", []BatchRecipient{{Address: v.Recipients[0].Address, Amount: "999"}}); err == nil || !strings.Contains(err.Error(), "400") { + t.Errorf("amount-mismatch capture = %v, want 400", err) + } + if _, err := g.CaptureBatch(ctx, "b5", []BatchRecipient{{Address: testSpender.Hex(), Amount: "1000"}}); err == nil || !strings.Contains(err.Error(), "400") { + t.Errorf("unknown-recipient capture = %v, want 400", err) + } + if sub.calls.Load() != 0 { + t.Fatalf("submitter was called %d times for rejected captures", sub.calls.Load()) + } + + // Voided escrow cannot capture. + if _, err := g.Void(ctx, "b5"); err != nil { + t.Fatal(err) + } + if _, err := g.Capture(ctx, "b5"); err == nil || !strings.Contains(err.Error(), "409") { + t.Errorf("capture voided = %v, want 409", err) + } + + // Settlement failure surfaces as 502 and leaves the escrow Reserved. + v6 := signedTestVoucher(t) + if _, err := g.Reserve(ctx, ReserveRequest{ID: "b6", Voucher: &v6}); err != nil { + t.Fatal(err) + } + sub.err = fmt.Errorf("rpc down") + if _, err := g.Capture(ctx, "b6"); err == nil || !strings.Contains(err.Error(), "502") { + t.Errorf("failed settle = %v, want 502", err) + } + sub.err = nil + if _, err := g.Capture(ctx, "b6"); err != nil { + t.Errorf("retry after settle failure: %v", err) + } +} + +func TestServer_CaptureWithoutSubmitter503(t *testing.T) { + _, g, _ := newTestServer(t, ServerOptions{Token: "secret", Spender: testSpender, Submitter: nil}) + ctx := context.Background() + + v := signedTestVoucher(t) + if _, err := g.Reserve(ctx, ReserveRequest{ID: "b7", Voucher: &v}); err != nil { + t.Fatal(err) + } + _, err := g.Capture(ctx, "b7") + if err == nil || !strings.Contains(err.Error(), "503") || !strings.Contains(err.Error(), "OBOL_ESCROW_KEY") { + t.Fatalf("capture without submitter = %v, want 503 naming OBOL_ESCROW_KEY", err) + } + // Reserve and void still work without a submitter. + if _, err := g.Void(ctx, "b7"); err != nil { + t.Fatalf("void without submitter: %v", err) + } +} + +func TestServer_ReserveVoucherWithoutSpenderConfigured(t *testing.T) { + _, g, _ := newTestServer(t, ServerOptions{Token: "secret"}) + ctx := context.Background() + + // Voucher-less reserve still answers (empty spender hint). + r, err := g.Reserve(ctx, ReserveRequest{ID: "b8"}) + if err != nil || r.State != StateAwaitingVoucher || r.Spender != "" { + t.Fatalf("reserve = %+v, %v", r, err) + } + // Voucher verification is refused: nothing to bind the spender to. + v := signedTestVoucher(t) + if _, err := g.Reserve(ctx, ReserveRequest{ID: "b8", Voucher: &v}); err == nil || !strings.Contains(err.Error(), "503") { + t.Fatalf("voucher reserve without spender = %v, want 503", err) + } +} + +func TestServer_VoidIdempotent(t *testing.T) { + _, g, _ := newTestServer(t, ServerOptions{Token: "secret", Spender: testSpender}) + ctx := context.Background() + + // Voiding the unknown is a no-op success (refunds re-runnable). + if r, err := g.Void(ctx, "ghost"); err != nil || r.State != StateVoided { + t.Fatalf("void unknown = %+v, %v", r, err) + } + if _, err := g.Reserve(ctx, ReserveRequest{ID: "b9"}); err != nil { + t.Fatal(err) + } + if r, err := g.Void(ctx, "b9"); err != nil || r.State != StateVoided { + t.Fatalf("void = %+v, %v", r, err) + } + if r, err := g.Void(ctx, "b9"); err != nil || r.State != StateVoided { + t.Fatalf("re-void = %+v, %v", r, err) + } +} + +func TestServer_BearerAuth(t *testing.T) { + srv, _, _ := newTestServer(t, ServerOptions{Token: "secret", Spender: testSpender}) + ctx := context.Background() + + wrong := &HTTPGateway{Base: srv.URL, Token: "wrong", Client: srv.Client()} + if _, err := wrong.Reserve(ctx, ReserveRequest{ID: "a1"}); err == nil || !strings.Contains(err.Error(), "401") { + t.Errorf("wrong token = %v, want 401", err) + } + missing := &HTTPGateway{Base: srv.URL, Client: srv.Client()} + if _, err := missing.Void(ctx, "a1"); err == nil || !strings.Contains(err.Error(), "401") { + t.Errorf("missing token = %v, want 401", err) + } + + // Empty server token disables auth — symmetric with the client omitting + // the Authorization header when its Token is empty. + srv2, g2, _ := newTestServer(t, ServerOptions{Token: "", Spender: testSpender}) + _ = srv2 + if _, err := g2.Reserve(ctx, ReserveRequest{ID: "a2"}); err != nil { + t.Errorf("unauthenticated reserve on tokenless server: %v", err) + } +} + +func TestServer_PersistenceAcrossRestart(t *testing.T) { + store, err := NewStore(t.TempDir()) + if err != nil { + t.Fatal(err) + } + sub := &fakeSubmitter{} + _, g, _ := newTestServerWithStore(t, store, ServerOptions{Token: "secret", Spender: testSpender, Submitter: sub}) + ctx := context.Background() + + v := signedTestVoucher(t) + if _, err := g.Reserve(ctx, ReserveRequest{ID: "p1", Voucher: &v}); err != nil { + t.Fatal(err) + } + if _, err := g.Capture(ctx, "p1"); err != nil { + t.Fatal(err) + } + + // "Restart": a new server over the same state dir, with a submitter that + // would fail if called — the stored receipt must be returned instead. + sub2 := &fakeSubmitter{err: fmt.Errorf("must not settle twice")} + _, g2, _ := newTestServerWithStore(t, store, ServerOptions{Token: "secret", Spender: testSpender, Submitter: sub2}) + r, err := g2.Capture(ctx, "p1") + if err != nil { + t.Fatalf("capture after restart: %v", err) + } + if r.State != StateCaptured || r.TxHash != "0xfeedface" { + t.Fatalf("receipt after restart = %+v", r) + } + if sub2.calls.Load() != 0 { + t.Fatal("restart capture re-submitted an already-captured escrow") + } +} + +func TestServer_InfoAndHealthz(t *testing.T) { + srv, _, _ := newTestServer(t, ServerOptions{Token: "secret", Spender: testSpender, Networks: []string{"base", "base-sepolia"}}) + + // /escrow/info is unauthenticated discovery: signers need the spender + // address before they hold any credential. + resp, err := srv.Client().Get(srv.URL + "/escrow/info") + if err != nil { + t.Fatal(err) + } + defer resp.Body.Close() + if resp.StatusCode != http.StatusOK { + t.Fatalf("info status = %d", resp.StatusCode) + } + var info struct { + Address string `json:"address"` + Networks []string `json:"networks"` + } + if err := json.NewDecoder(resp.Body).Decode(&info); err != nil { + t.Fatal(err) + } + if info.Address != testSpender.Hex() { + t.Errorf("info address = %q", info.Address) + } + if len(info.Networks) != 2 || info.Networks[0] != "base" { + t.Errorf("info networks = %v", info.Networks) + } + + hz, err := srv.Client().Get(srv.URL + "/healthz") + if err != nil { + t.Fatal(err) + } + hz.Body.Close() + if hz.StatusCode != http.StatusOK { + t.Errorf("healthz status = %d", hz.StatusCode) + } +} + +func TestServer_RejectsUnsafeIDs(t *testing.T) { + srv, _, _ := newTestServer(t, ServerOptions{Token: "", Spender: testSpender}) + + resp, err := srv.Client().Post(srv.URL+"/escrow/void/bad%20id", "application/json", nil) + if err != nil { + t.Fatal(err) + } + resp.Body.Close() + if resp.StatusCode != http.StatusBadRequest { + t.Errorf("unsafe id status = %d, want 400", resp.StatusCode) + } +} diff --git a/internal/x402/escrow/settle.go b/internal/x402/escrow/settle.go new file mode 100644 index 00000000..fa9270d6 --- /dev/null +++ b/internal/x402/escrow/settle.go @@ -0,0 +1,342 @@ +package escrow + +import ( + "context" + "crypto/ecdsa" + "fmt" + "math/big" + "strings" + "sync" + "time" + + "github.com/ethereum/go-ethereum/accounts/abi" + "github.com/ethereum/go-ethereum/accounts/abi/bind" + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/common/hexutil" + "github.com/ethereum/go-ethereum/core/types" + "github.com/ethereum/go-ethereum/ethclient" + + "github.com/ObolNetwork/obol-stack/internal/erc8004" + "github.com/ObolNetwork/obol-stack/internal/x402" +) + +// permit2TransferABI is the minimal ABI fragment for Permit2 +// SignatureTransfer's batch permitTransferFrom. Note the calldata permit tuple +// carries NO spender — Permit2 enforces spender == msg.sender on-chain, which +// is exactly why the facilitator address must be signed into the voucher. +const permit2TransferABI = `[{ + "name": "permitTransferFrom", + "type": "function", + "stateMutability": "nonpayable", + "inputs": [ + {"name": "permit", "type": "tuple", "components": [ + {"name": "permitted", "type": "tuple[]", "components": [ + {"name": "token", "type": "address"}, + {"name": "amount", "type": "uint256"} + ]}, + {"name": "nonce", "type": "uint256"}, + {"name": "deadline", "type": "uint256"} + ]}, + {"name": "transferDetails", "type": "tuple[]", "components": [ + {"name": "to", "type": "address"}, + {"name": "requestedAmount", "type": "uint256"} + ]}, + {"name": "owner", "type": "address"}, + {"name": "signature", "type": "bytes"} + ], + "outputs": [] +}]` + +var permit2ABI = sync.OnceValues(func() (abi.ABI, error) { + return abi.JSON(strings.NewReader(permit2TransferABI)) +}) + +// permit2TokenPermissions mirrors Permit2's TokenPermissions struct for ABI +// packing (component names token/amount map to these fields). +type permit2TokenPermissions struct { + Token common.Address + Amount *big.Int +} + +// permit2PermitBatchTransferFrom mirrors ISignatureTransfer.PermitBatchTransferFrom. +type permit2PermitBatchTransferFrom struct { + Permitted []permit2TokenPermissions + Nonce *big.Int + Deadline *big.Int +} + +// permit2SignatureTransferDetails mirrors ISignatureTransfer.SignatureTransferDetails. +type permit2SignatureTransferDetails struct { + To common.Address + RequestedAmount *big.Int +} + +// TransferDetail is one entry of the on-chain transferDetails array. It pairs +// INDEX-WISE with the voucher's Recipients/permitted array: omitted seats stay +// in the array with Amount 0 (Permit2 allows requesting less than permitted, +// including zero) — the array is never shortened. +type TransferDetail struct { + To common.Address + Amount *big.Int +} + +// HumanToAtomic converts a human-unit decimal amount (e.g. "500.00") to +// atomic token units ("500000000" at 6 decimals) without float rounding. +// Trailing zeros beyond the token's precision are tolerated; non-zero +// sub-atomic remainders are an error. Every voucher seat amount and every +// capture recipient amount MUST be atomic — BuildTransferDetails matches +// requested recipients against signed seats with exact integer comparison. +func HumanToAtomic(amount string, decimals int) (string, error) { + s := strings.TrimSpace(amount) + if s == "" { + return "", fmt.Errorf("amount is empty") + } + whole, frac, _ := strings.Cut(s, ".") + if whole == "" { + whole = "0" + } + for _, r := range whole + frac { + if r < '0' || r > '9' { + return "", fmt.Errorf("amount %q is not a non-negative decimal number", amount) + } + } + if len(frac) > decimals { + if strings.Trim(frac[decimals:], "0") != "" { + return "", fmt.Errorf("amount %q has more than %d decimal places", amount, decimals) + } + frac = frac[:decimals] + } + frac += strings.Repeat("0", decimals-len(frac)) + v, ok := new(big.Int).SetString(whole+frac, 10) + if !ok { + return "", fmt.Errorf("amount %q is not a decimal number", amount) + } + if v.Sign() <= 0 { + return "", fmt.Errorf("amount %q must be positive", amount) + } + return v.String(), nil +} + +// BuildTransferDetails validates that requested is a subset of the voucher's +// recipient seats with exactly matching per-seat amounts, and returns the +// index-wise transferDetails array: matched seats carry their signed amount, +// omitted seats carry zero (unpaid). Duplicate seats are consumed one +// requested entry per seat. +func BuildTransferDetails(v Permit2Voucher, requested []BatchRecipient) ([]TransferDetail, error) { + if len(v.Recipients) == 0 { + return nil, fmt.Errorf("escrow settle: voucher has no recipients") + } + if len(requested) == 0 { + return nil, fmt.Errorf("escrow settle: no recipients requested") + } + + consumed := make([]bool, len(v.Recipients)) + amounts := make([]*big.Int, len(v.Recipients)) + for _, r := range requested { + want, err := parsePositiveAmount("requested amount", r.Amount) + if err != nil { + return nil, err + } + matched := false + for i, seat := range v.Recipients { + if consumed[i] || !strings.EqualFold(seat.Address, r.Address) { + continue + } + signed, err := parsePositiveAmount("voucher amount", seat.Amount) + if err != nil { + return nil, err + } + if signed.Cmp(want) != 0 { + continue // amount mismatch on this seat; another seat may match + } + consumed[i] = true + amounts[i] = want + matched = true + break + } + if !matched { + return nil, fmt.Errorf("escrow settle: requested recipient %s amount %s does not match any unconsumed voucher seat", r.Address, r.Amount) + } + } + + details := make([]TransferDetail, len(v.Recipients)) + for i, seat := range v.Recipients { + amount := amounts[i] + if amount == nil { + amount = new(big.Int) // omitted seat: requestedAmount = 0 + } + details[i] = TransferDetail{To: common.HexToAddress(seat.Address), Amount: amount} + } + return details, nil +} + +// BuildPermitTransferFromCalldata packs the permitTransferFrom calldata for a +// signed voucher and the index-wise transferDetails from BuildTransferDetails. +// len(details) must equal len(v.Recipients) — transferDetails[i] pairs with +// permitted[i]. +func BuildPermitTransferFromCalldata(v Permit2Voucher, details []TransferDetail) ([]byte, error) { + if err := validateVoucherFields(v); err != nil { + return nil, err + } + if len(details) != len(v.Recipients) { + return nil, fmt.Errorf("escrow settle: transferDetails length %d must equal voucher recipients %d (omitted seats get amount 0, the array is never shortened)", len(details), len(v.Recipients)) + } + + token := common.HexToAddress(v.Token) + permitted := make([]permit2TokenPermissions, len(v.Recipients)) + for i, seat := range v.Recipients { + amount, err := parsePositiveAmount("voucher amount", seat.Amount) + if err != nil { + return nil, err + } + permitted[i] = permit2TokenPermissions{Token: token, Amount: amount} + } + nonce, err := parseUint256("nonce", v.Nonce) + if err != nil { + return nil, err + } + permit := permit2PermitBatchTransferFrom{ + Permitted: permitted, + Nonce: nonce, + Deadline: big.NewInt(v.Deadline), + } + + transferDetails := make([]permit2SignatureTransferDetails, len(details)) + for i, d := range details { + amount := d.Amount + if amount == nil { + amount = new(big.Int) + } + if amount.Sign() < 0 { + return nil, fmt.Errorf("escrow settle: transferDetails[%d] amount is negative", i) + } + transferDetails[i] = permit2SignatureTransferDetails{To: d.To, RequestedAmount: amount} + } + + sig, err := hexutil.Decode(v.Signature) + if err != nil { + return nil, fmt.Errorf("escrow settle: decode voucher signature: %w", err) + } + + parsed, err := permit2ABI() + if err != nil { + return nil, fmt.Errorf("escrow settle: parse permit2 abi: %w", err) + } + calldata, err := parsed.Pack("permitTransferFrom", permit, transferDetails, common.HexToAddress(v.Owner), sig) + if err != nil { + return nil, fmt.Errorf("escrow settle: pack permitTransferFrom: %w", err) + } + return calldata, nil +} + +// Submitter executes a verified voucher on-chain. Abstracted so the server +// and its tests never need a live chain. +type Submitter interface { + // Submit sends permitTransferFrom(voucher, details) and waits for the + // receipt. details must pair index-wise with v.Recipients. + Submit(ctx context.Context, v Permit2Voucher, details []TransferDetail) (txHash string, err error) +} + +// erpcNetworkSuffix maps a voucher network alias to the eRPC path segment, +// following the same pattern as erc8004.NewClientForNetwork +// (/, e.g. "ethereum" → "mainnet"). +func erpcNetworkSuffix(network string) string { + if net, err := erc8004.ResolveNetwork(network); err == nil { + return net.ERPCNetwork + } + if info, err := x402.ResolveChainInfo(network); err == nil { + return info.Name + } + return strings.TrimSpace(network) +} + +// EthSubmitter submits permitTransferFrom transactions via JSON-RPC, signing +// locally with Key or remotely via Signer (exactly one should be set). +type EthSubmitter struct { + // RPCBase is the per-network JSON-RPC base; the endpoint is + // /. Defaults to erc8004.DefaultRPCBase + // (in-cluster eRPC). + RPCBase string + // Key signs locally when set. + Key *ecdsa.PrivateKey + // Signer signs via the remote-signer REST API when Key is nil. + Signer *erc8004.RemoteSigner + // SignerAddress is the remote signer's address; resolved via + // Signer.GetAddress when zero. + SignerAddress common.Address + // ReceiptTimeout bounds the wait for the settlement receipt (default 2m). + ReceiptTimeout time.Duration +} + +func (s *EthSubmitter) Submit(ctx context.Context, v Permit2Voucher, details []TransferDetail) (string, error) { + if s.Key == nil && s.Signer == nil { + return "", fmt.Errorf("escrow settle: no signing key configured (set OBOL_ESCROW_KEY or OBOL_ESCROW_SIGNER_URL)") + } + + base := s.RPCBase + if base == "" { + base = erc8004.DefaultRPCBase + } + rpcURL := strings.TrimRight(base, "/") + "/" + erpcNetworkSuffix(v.Network) + eth, err := ethclient.DialContext(ctx, rpcURL) + if err != nil { + return "", fmt.Errorf("escrow settle: dial %s: %w", rpcURL, err) + } + defer eth.Close() + + chainID, err := eth.ChainID(ctx) + if err != nil { + return "", fmt.Errorf("escrow settle: chain id: %w", err) + } + if want, resolveErr := ChainIDForNetwork(v.Network); resolveErr == nil && want.Cmp(chainID) != 0 { + return "", fmt.Errorf("escrow settle: rpc %s reports chain %s but voucher network %q expects %s", rpcURL, chainID, v.Network, want) + } + + calldata, err := BuildPermitTransferFromCalldata(v, details) + if err != nil { + return "", err + } + + var opts *bind.TransactOpts + if s.Key != nil { + opts, err = bind.NewKeyedTransactorWithChainID(s.Key, chainID) + if err != nil { + return "", fmt.Errorf("escrow settle: transactor: %w", err) + } + opts.Context = ctx + } else { + addr := s.SignerAddress + if addr == (common.Address{}) { + addr, err = s.Signer.GetAddress(ctx) + if err != nil { + return "", fmt.Errorf("escrow settle: resolve remote signer address: %w", err) + } + } + opts = s.Signer.RemoteTransactOpts(ctx, addr, chainID) + } + + parsed, err := permit2ABI() + if err != nil { + return "", fmt.Errorf("escrow settle: parse permit2 abi: %w", err) + } + contract := bind.NewBoundContract(common.HexToAddress(Permit2Address), parsed, eth, eth, eth) + tx, err := contract.RawTransact(opts, calldata) + if err != nil { + return "", fmt.Errorf("escrow settle: submit permitTransferFrom: %w", err) + } + + timeout := s.ReceiptTimeout + if timeout <= 0 { + timeout = 2 * time.Minute + } + waitCtx, cancel := context.WithTimeout(ctx, timeout) + defer cancel() + receipt, err := bind.WaitMined(waitCtx, eth, tx) + if err != nil { + return "", fmt.Errorf("escrow settle: wait receipt for %s: %w", tx.Hash().Hex(), err) + } + if receipt.Status != types.ReceiptStatusSuccessful { + return "", fmt.Errorf("escrow settle: permitTransferFrom %s reverted", tx.Hash().Hex()) + } + return tx.Hash().Hex(), nil +} diff --git a/internal/x402/escrow/settle_test.go b/internal/x402/escrow/settle_test.go new file mode 100644 index 00000000..b6ac8549 --- /dev/null +++ b/internal/x402/escrow/settle_test.go @@ -0,0 +1,184 @@ +package escrow + +import ( + "encoding/hex" + "math/big" + "strings" + "testing" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/crypto" +) + +func TestBuildTransferDetails_FullAndSubset(t *testing.T) { + v, _ := goldenVoucher(t) + + // Full capture: every seat paid. + full, err := BuildTransferDetails(v, v.Recipients) + if err != nil { + t.Fatalf("full capture: %v", err) + } + if len(full) != 2 || full[0].Amount.Cmp(big.NewInt(1000)) != 0 || full[1].Amount.Cmp(big.NewInt(2500)) != 0 { + t.Fatalf("full details = %+v", full) + } + + // Subset: only the second seat paid; the first stays in the array at 0 + // (index-wise pairing with permitted[i], never shortened). + subset, err := BuildTransferDetails(v, []BatchRecipient{{Address: strings.ToLower(v.Recipients[1].Address), Amount: "2500"}}) + if err != nil { + t.Fatalf("subset capture: %v", err) + } + if len(subset) != 2 { + t.Fatalf("subset details length = %d, want 2 (omitted seats stay at zero)", len(subset)) + } + if subset[0].Amount.Sign() != 0 { + t.Errorf("omitted seat amount = %s, want 0", subset[0].Amount) + } + if subset[0].To != common.HexToAddress(v.Recipients[0].Address) { + t.Errorf("omitted seat To = %s, want voucher seat address", subset[0].To.Hex()) + } + if subset[1].Amount.Cmp(big.NewInt(2500)) != 0 { + t.Errorf("paid seat amount = %s, want 2500", subset[1].Amount) + } +} + +func TestBuildTransferDetails_Errors(t *testing.T) { + v, _ := goldenVoucher(t) + + // Recipient not in the voucher. + if _, err := BuildTransferDetails(v, []BatchRecipient{{Address: testSpender.Hex(), Amount: "1000"}}); err == nil { + t.Error("unknown recipient should fail") + } + // Amount differs from the signed seat amount. + if _, err := BuildTransferDetails(v, []BatchRecipient{{Address: v.Recipients[0].Address, Amount: "999"}}); err == nil { + t.Error("amount mismatch should fail") + } + // More than the signed amount. + if _, err := BuildTransferDetails(v, []BatchRecipient{{Address: v.Recipients[0].Address, Amount: "100000"}}); err == nil { + t.Error("over-request should fail") + } + // Empty request. + if _, err := BuildTransferDetails(v, nil); err == nil { + t.Error("empty request should fail") + } + // Same seat requested twice (only one seat exists at that address). + if _, err := BuildTransferDetails(v, []BatchRecipient{ + {Address: v.Recipients[0].Address, Amount: "1000"}, + {Address: v.Recipients[0].Address, Amount: "1000"}, + }); err == nil { + t.Error("double-spending one seat should fail") + } +} + +func TestBuildTransferDetails_DuplicateSeats(t *testing.T) { + v, _ := goldenVoucher(t) + addr := v.Recipients[0].Address + v.Recipients = []BatchRecipient{ + {Address: addr, Amount: "1000"}, + {Address: addr, Amount: "1000"}, + } + + // Two identical seats: requesting twice consumes both. + details, err := BuildTransferDetails(v, []BatchRecipient{ + {Address: addr, Amount: "1000"}, + {Address: addr, Amount: "1000"}, + }) + if err != nil { + t.Fatalf("duplicate seats: %v", err) + } + if details[0].Amount.Cmp(big.NewInt(1000)) != 0 || details[1].Amount.Cmp(big.NewInt(1000)) != 0 { + t.Fatalf("details = %+v", details) + } + + // Requesting once pays one seat, leaves the other at zero. + one, err := BuildTransferDetails(v, []BatchRecipient{{Address: addr, Amount: "1000"}}) + if err != nil { + t.Fatal(err) + } + if one[0].Amount.Cmp(big.NewInt(1000)) != 0 || one[1].Amount.Sign() != 0 { + t.Fatalf("one-seat details = %+v", one) + } +} + +// goldenCalldata is the exact permitTransferFrom calldata for goldenVoucher +// signed with anvil key 0 on chain 84532, capturing ONLY the first seat — +// the second seat appears index-wise with requestedAmount 0. +// Selector edd9444b = keccak("permitTransferFrom(((address,uint256)[],uint256,uint256),(address,uint256)[],address,bytes)")[:4]. +const goldenCalldata = "edd9444b" + + "0000000000000000000000000000000000000000000000000000000000000080" + // permit tuple offset + "0000000000000000000000000000000000000000000000000000000000000180" + // transferDetails offset + "000000000000000000000000f39fd6e51aad88f6f4ce6ab8827279cfffb92266" + // owner + "0000000000000000000000000000000000000000000000000000000000000220" + // signature offset + "0000000000000000000000000000000000000000000000000000000000000060" + // permit.permitted offset + "0000000000000000000000000000000000000000000000000000000000000001" + // nonce = 1 + "0000000000000000000000000000000000000000000000000000000070dbd880" + // deadline = 1893456000 + "0000000000000000000000000000000000000000000000000000000000000002" + // permitted length + "000000000000000000000000036cbd53842c5426634e7929541ec2318f3dcf7e" + // permitted[0].token + "00000000000000000000000000000000000000000000000000000000000003e8" + // permitted[0].amount = 1000 + "000000000000000000000000036cbd53842c5426634e7929541ec2318f3dcf7e" + // permitted[1].token + "00000000000000000000000000000000000000000000000000000000000009c4" + // permitted[1].amount = 2500 + "0000000000000000000000000000000000000000000000000000000000000002" + // transferDetails length + "0000000000000000000000003c44cdddb6a900fa2b585dd299e03d12fa4293bc" + // details[0].to + "00000000000000000000000000000000000000000000000000000000000003e8" + // details[0].requestedAmount = 1000 + "00000000000000000000000090f79bf6eb2c4f870365e785982e1f101e93b906" + // details[1].to (omitted seat) + "0000000000000000000000000000000000000000000000000000000000000000" + // details[1].requestedAmount = 0 + "0000000000000000000000000000000000000000000000000000000000000041" + // signature length (65) + "8eb05e00fa60ef44b63ec69978e25ce2d2f3a142ce3d603e89b4e8c06811555a" + + "7c41076a83d3f1b24405b7418cb4041b269325c2f4fae161f01460aab0cb6f40" + + "1c00000000000000000000000000000000000000000000000000000000000000" + +func TestBuildPermitTransferFromCalldata_Golden(t *testing.T) { + v, key := goldenVoucher(t) + chainID := big.NewInt(84532) + if err := SignVoucher(&v, chainID, key); err != nil { + t.Fatal(err) + } + + details, err := BuildTransferDetails(v, []BatchRecipient{{Address: v.Recipients[0].Address, Amount: "1000"}}) + if err != nil { + t.Fatal(err) + } + calldata, err := BuildPermitTransferFromCalldata(v, details) + if err != nil { + t.Fatalf("BuildPermitTransferFromCalldata: %v", err) + } + if got := hex.EncodeToString(calldata); got != goldenCalldata { + t.Errorf("calldata mismatch:\n got %s\nwant %s", got, goldenCalldata) + } + + // Independent cross-check: the ABI fragment must produce the canonical + // batch permitTransferFrom selector. + wantSelector := crypto.Keccak256([]byte("permitTransferFrom(((address,uint256)[],uint256,uint256),(address,uint256)[],address,bytes)"))[:4] + if hex.EncodeToString(calldata[:4]) != hex.EncodeToString(wantSelector) { + t.Errorf("selector = %x, want %x", calldata[:4], wantSelector) + } +} + +func TestBuildPermitTransferFromCalldata_LengthInvariant(t *testing.T) { + v, key := goldenVoucher(t) + chainID := big.NewInt(84532) + if err := SignVoucher(&v, chainID, key); err != nil { + t.Fatal(err) + } + + // transferDetails must pair index-wise with permitted — shortening it is + // a hard error, not a silent re-pairing. + short := []TransferDetail{{To: common.HexToAddress(v.Recipients[0].Address), Amount: big.NewInt(1000)}} + if _, err := BuildPermitTransferFromCalldata(v, short); err == nil { + t.Error("shortened transferDetails should fail") + } +} + +func TestErpcNetworkSuffix(t *testing.T) { + for _, tc := range []struct{ network, want string }{ + {"ethereum", "mainnet"}, // erc8004 eRPC alias convention + {"base-sepolia", "base-sepolia"}, + {"base", "base"}, + {"eip155:84532", "base-sepolia"}, // CAIP-2 falls through to the x402 registry + {"my-custom-net", "my-custom-net"}, + } { + if got := erpcNetworkSuffix(tc.network); got != tc.want { + t.Errorf("erpcNetworkSuffix(%q) = %q, want %q", tc.network, got, tc.want) + } + } +} diff --git a/internal/x402/escrow/store.go b/internal/x402/escrow/store.go new file mode 100644 index 00000000..495bdc70 --- /dev/null +++ b/internal/x402/escrow/store.go @@ -0,0 +1,109 @@ +package escrow + +import ( + "encoding/json" + "fmt" + "os" + "path/filepath" + "sync" + "time" +) + +// StateAwaitingVoucher is the escrow entry state between a voucher-less +// Reserve (which surfaces the facilitator's spender address for the signer to +// bind) and the re-reserve that attaches the signed voucher. +const StateAwaitingVoucher = "AwaitingVoucher" + +// Entry is one escrow's persisted lifecycle record. +type Entry struct { + // ID is the escrow key (the ServiceBounty UID). + ID string `json:"id"` + // State is one of AwaitingVoucher | Reserved | Captured | Voided. + State string `json:"state"` + // Request is the last accepted reserve request, including the voucher. + Request *ReserveRequest `json:"request,omitempty"` + // Receipt is the receipt last returned for this entry. + Receipt Receipt `json:"receipt"` + // UpdatedAt is the last state-transition time. + UpdatedAt time.Time `json:"updatedAt"` +} + +// Store is a file-backed JSON escrow store: one .json per entry, written +// via temp-file + atomic rename so a crash never leaves a torn entry. +type Store struct { + dir string + mu sync.Mutex +} + +// NewStore creates (if needed) and opens the state directory. +func NewStore(dir string) (*Store, error) { + if dir == "" { + return nil, fmt.Errorf("escrow store: empty state dir") + } + if err := os.MkdirAll(dir, 0o755); err != nil { + return nil, fmt.Errorf("escrow store: create %s: %w", dir, err) + } + return &Store{dir: dir}, nil +} + +func (s *Store) path(id string) string { + return filepath.Join(s.dir, id+".json") +} + +// Get loads the entry for id; ok is false when none exists. +func (s *Store) Get(id string) (Entry, bool, error) { + s.mu.Lock() + defer s.mu.Unlock() + + raw, err := os.ReadFile(s.path(id)) + if os.IsNotExist(err) { + return Entry{}, false, nil + } + if err != nil { + return Entry{}, false, fmt.Errorf("escrow store: read %s: %w", id, err) + } + var e Entry + if err := json.Unmarshal(raw, &e); err != nil { + return Entry{}, false, fmt.Errorf("escrow store: decode %s: %w", id, err) + } + return e, true, nil +} + +// Put persists the entry atomically (write temp file, fsync, rename). +func (s *Store) Put(e Entry) error { + if e.ID == "" { + return fmt.Errorf("escrow store: entry has no id") + } + s.mu.Lock() + defer s.mu.Unlock() + + raw, err := json.MarshalIndent(e, "", " ") + if err != nil { + return fmt.Errorf("escrow store: encode %s: %w", e.ID, err) + } + + tmp, err := os.CreateTemp(s.dir, "."+e.ID+".tmp-*") + if err != nil { + return fmt.Errorf("escrow store: temp file for %s: %w", e.ID, err) + } + tmpName := tmp.Name() + if _, err := tmp.Write(raw); err != nil { + tmp.Close() + os.Remove(tmpName) + return fmt.Errorf("escrow store: write %s: %w", e.ID, err) + } + if err := tmp.Sync(); err != nil { + tmp.Close() + os.Remove(tmpName) + return fmt.Errorf("escrow store: sync %s: %w", e.ID, err) + } + if err := tmp.Close(); err != nil { + os.Remove(tmpName) + return fmt.Errorf("escrow store: close %s: %w", e.ID, err) + } + if err := os.Rename(tmpName, s.path(e.ID)); err != nil { + os.Remove(tmpName) + return fmt.Errorf("escrow store: rename %s: %w", e.ID, err) + } + return nil +} diff --git a/internal/x402/serviceoffer_source.go b/internal/x402/serviceoffer_source.go index e44a3197..77a852f7 100644 --- a/internal/x402/serviceoffer_source.go +++ b/internal/x402/serviceoffer_source.go @@ -181,6 +181,30 @@ func routeRuleFromOffer(offer *monetizeapi.ServiceOffer, upstreamAuth string) (R MaxTimeoutSeconds: offer.Spec.Payment.MaxTimeoutSeconds, } + // MPP credit-card offers carry off-chain Stripe settlement terms instead + // of the crypto payTo/network/asset. Populate the card route so the + // verifier gates this offer through serveCardGated (matchPaidRouteFull / + // HandleProxy dispatch on rule.IsCard()). + if strings.EqualFold(offer.Spec.Payment.Method, "card") && offer.Spec.Payment.Card != nil { + c := offer.Spec.Payment.Card + currency := strings.ToLower(strings.TrimSpace(c.Currency)) + if currency == "" { + currency = defaultCardCurrency + } + provider := c.Provider + if provider == "" { + provider = cardNetworkStripe + } + rule.Card = &CardRoute{ + Provider: provider, + Account: c.Account, + Currency: currency, + Decimals: currencyMinorUnits(currency), + NetworkID: c.NetworkID, + PaymentMethodTypes: append([]string(nil), c.PaymentMethodTypes...), + } + } + if offer.IsAgent() && offer.Status.AgentResolution != nil { res := offer.Status.AgentResolution rule.AgentModel = res.Model @@ -199,6 +223,16 @@ func routeRuleFromOffer(offer *monetizeapi.ServiceOffer, upstreamAuth string) (R rule.DatasetFileHash = strings.ToLower(offer.Spec.Dataset.FileHash) rule.DatasetSizeBytes = offer.Spec.Dataset.SizeBytes } + // Skill offers advertise the bundle identity + integrity hash so the + // 402 response carries extra.skill (mirrors the agent extras above). + // Upstream URL/auth need no special-casing: spec.upstream points at + // the controller-rendered bundle server and effectiveUpstreamAuth + // returns "" for non-litellm services. + if offer.IsSkill() { + rule.SkillName = offer.Spec.Skill.Name + rule.SkillVersion = offer.Spec.Skill.Version + rule.SkillSHA256 = strings.ToLower(offer.Spec.Skill.SHA256) + } return rule, nil } diff --git a/internal/x402/serviceoffer_source_skill_test.go b/internal/x402/serviceoffer_source_skill_test.go new file mode 100644 index 00000000..8bdae8df --- /dev/null +++ b/internal/x402/serviceoffer_source_skill_test.go @@ -0,0 +1,119 @@ +package x402 + +import ( + "strings" + "testing" + + "github.com/ObolNetwork/obol-stack/internal/monetizeapi" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +func skillSourceTestOffer() monetizeapi.ServiceOffer { + return monetizeapi.ServiceOffer{ + ObjectMeta: metav1.ObjectMeta{Name: "buy-x402", Namespace: "hermes-obol-agent"}, + Spec: monetizeapi.ServiceOfferSpec{ + Type: "skill", + Skill: monetizeapi.ServiceOfferSkill{ + Name: "buy-x402", + Version: "0.1.0", + SHA256: strings.Repeat("0a", 32), + BundleConfigMap: "buy-x402-skill-bundle", + }, + Upstream: monetizeapi.ServiceOfferUpstream{ + Service: monetizeapi.SkillBundleWorkloadName("buy-x402"), + Namespace: "hermes-obol-agent", + Port: 8080, + HealthPath: "/skill.json", + }, + Payment: monetizeapi.ServiceOfferPayment{ + PayTo: "0x1111111111111111111111111111111111111111", + Network: "base-sepolia", + Price: monetizeapi.ServiceOfferPriceTable{PerRequest: "0.01"}, + }, + }, + Status: monetizeapi.ServiceOfferStatus{ + Conditions: []monetizeapi.Condition{{Type: "RoutePublished", Status: "True"}}, + }, + } +} + +func TestRouteRuleFromOffer_SkillPopulatesSkillFields(t *testing.T) { + offer := skillSourceTestOffer() + + rule, err := routeRuleFromOffer(&offer, "") + if err != nil { + t.Fatalf("routeRuleFromOffer: %v", err) + } + + if rule.SkillName != "buy-x402" { + t.Errorf("SkillName = %q, want buy-x402", rule.SkillName) + } + if rule.SkillVersion != "0.1.0" { + t.Errorf("SkillVersion = %q, want 0.1.0", rule.SkillVersion) + } + if rule.SkillSHA256 != strings.Repeat("0a", 32) { + t.Errorf("SkillSHA256 = %q", rule.SkillSHA256) + } + if rule.OfferType != "skill" { + t.Errorf("OfferType = %q, want skill", rule.OfferType) + } + + // The upstream URL must be the controller-rendered bundle server, + // derived from spec.upstream with no skill-specific synthesis. + wantURL := "http://so-buy-x402-bundle.hermes-obol-agent.svc.cluster.local:8080" + if rule.UpstreamURL != wantURL { + t.Errorf("UpstreamURL = %q, want %q", rule.UpstreamURL, wantURL) + } + if rule.Pattern != "/services/buy-x402/*" { + t.Errorf("Pattern = %q, want /services/buy-x402/*", rule.Pattern) + } +} + +func TestRouteRuleFromOffer_SkillUppercaseHashNormalizedToLower(t *testing.T) { + offer := skillSourceTestOffer() + offer.Spec.Skill.SHA256 = strings.ToUpper(strings.Repeat("0a", 32)) + + rule, err := routeRuleFromOffer(&offer, "") + if err != nil { + t.Fatalf("routeRuleFromOffer: %v", err) + } + if rule.SkillSHA256 != strings.Repeat("0a", 32) { + t.Errorf("SkillSHA256 = %q, want lowercase", rule.SkillSHA256) + } +} + +func TestRouteRuleFromOffer_SkillUpstreamAuthStaysEmpty(t *testing.T) { + // Even if a litellm master key exists for the namespace, the bundle + // server is a static file host — no Authorization header may be + // injected (effectiveUpstreamAuth only injects for litellm/agent). + offer := skillSourceTestOffer() + + rule, err := routeRuleFromOffer(&offer, "Bearer should-not-leak") + if err != nil { + t.Fatalf("routeRuleFromOffer: %v", err) + } + if rule.UpstreamAuth != "" { + t.Errorf("UpstreamAuth = %q, want empty for skill bundle upstream", rule.UpstreamAuth) + } +} + +func TestRouteRuleFromOffer_NonSkillOffersCarryNoSkillFields(t *testing.T) { + offer := monetizeapi.ServiceOffer{ + ObjectMeta: metav1.ObjectMeta{Name: "plain", Namespace: "llm"}, + Spec: monetizeapi.ServiceOfferSpec{ + Type: "http", + Upstream: monetizeapi.ServiceOfferUpstream{Service: "httpbin", Namespace: "llm", Port: 8080}, + Payment: monetizeapi.ServiceOfferPayment{ + Price: monetizeapi.ServiceOfferPriceTable{PerRequest: "0.01"}, + }, + }, + } + + rule, err := routeRuleFromOffer(&offer, "") + if err != nil { + t.Fatalf("routeRuleFromOffer: %v", err) + } + if rule.SkillName != "" || rule.SkillVersion != "" || rule.SkillSHA256 != "" { + t.Errorf("non-skill rule gained skill fields: %q %q %q", rule.SkillName, rule.SkillVersion, rule.SkillSHA256) + } +} diff --git a/internal/x402/serviceoffer_source_test.go b/internal/x402/serviceoffer_source_test.go index 90c28751..4fef21c8 100644 --- a/internal/x402/serviceoffer_source_test.go +++ b/internal/x402/serviceoffer_source_test.go @@ -407,3 +407,65 @@ func mustSecretObject(t *testing.T, namespace, name string, data map[string]stri }} return obj } + +func TestRouteRuleFromOffer_CardPaymentPopulatesCardRoute(t *testing.T) { + offer := &monetizeapi.ServiceOffer{ + ObjectMeta: metav1.ObjectMeta{Name: "card-svc", Namespace: "shop"}, + Spec: monetizeapi.ServiceOfferSpec{ + Type: "http", + Upstream: monetizeapi.ServiceOfferUpstream{Service: "api", Namespace: "shop", Port: 8080}, + Payment: monetizeapi.ServiceOfferPayment{ + Method: "card", + Card: &monetizeapi.ServiceOfferCardPayment{ + Provider: "stripe", + Account: "acct_shop1", + Currency: "jpy", + NetworkID: "stripenet_1", + }, + Price: monetizeapi.ServiceOfferPriceTable{PerRequest: "100"}, + }, + }, + } + + route, err := routeRuleFromOffer(offer, "") + if err != nil { + t.Fatalf("routeRuleFromOffer: %v", err) + } + if !route.IsCard() { + t.Fatal("expected a card route") + } + if route.Card.Account != "acct_shop1" || route.Card.Provider != "stripe" { + t.Errorf("card = %+v", route.Card) + } + // jpy currency derives 0 minor-unit decimals. + if route.Card.Currency != "jpy" || route.Card.Decimals != 0 { + t.Errorf("currency/decimals = %q/%d, want jpy/0", route.Card.Currency, route.Card.Decimals) + } + if route.Card.NetworkID != "stripenet_1" { + t.Errorf("networkId = %q, want stripenet_1", route.Card.NetworkID) + } + // The built requirement uses jpy minor units: ¥100 -> "100". + if amt := buildCardRequirement(&route).Amount; amt != "100" { + t.Errorf("card requirement amount = %q, want 100", amt) + } + + // A crypto offer must NOT produce a card route. + cryptoOffer := &monetizeapi.ServiceOffer{ + ObjectMeta: metav1.ObjectMeta{Name: "c", Namespace: "n"}, + Spec: monetizeapi.ServiceOfferSpec{ + Type: "http", + Payment: monetizeapi.ServiceOfferPayment{ + Network: "base", + PayTo: "0x1111111111111111111111111111111111111111", + Price: monetizeapi.ServiceOfferPriceTable{PerRequest: "0.01"}, + }, + }, + } + cr, err := routeRuleFromOffer(cryptoOffer, "") + if err != nil { + t.Fatalf("routeRuleFromOffer(crypto): %v", err) + } + if cr.IsCard() { + t.Error("crypto offer must not produce a card route") + } +} diff --git a/internal/x402/skill_extras_test.go b/internal/x402/skill_extras_test.go new file mode 100644 index 00000000..b04d5525 --- /dev/null +++ b/internal/x402/skill_extras_test.go @@ -0,0 +1,138 @@ +package x402 + +import ( + "encoding/json" + "io" + "net/http" + "net/http/httptest" + "strings" + "testing" + + x402types "github.com/x402-foundation/x402/go/types" +) + +func TestMergeSkillExtras_Noop_NonSkillRule(t *testing.T) { + req := x402types.PaymentRequirements{Extra: map[string]any{"name": "USDC"}} + rule := &RouteRule{} + + mergeSkillExtras(&req, rule) + + if _, ok := req.Extra["skill"]; ok { + t.Error("non-skill rule must not add extra.skill") + } + if got := req.Extra["name"]; got != "USDC" { + t.Errorf("non-skill merge clobbered existing extra.name: %v", got) + } +} + +func TestMergeSkillExtras_AddsSkillBlock(t *testing.T) { + req := x402types.PaymentRequirements{Extra: map[string]any{}} + rule := &RouteRule{ + SkillName: "buy-x402", + SkillVersion: "0.1.0", + SkillSHA256: strings.Repeat("ab", 32), + } + + mergeSkillExtras(&req, rule) + + skill, ok := req.Extra["skill"].(map[string]any) + if !ok { + t.Fatalf("extra.skill wrong type: %T", req.Extra["skill"]) + } + if skill["name"] != "buy-x402" { + t.Errorf("skill.name = %v, want buy-x402", skill["name"]) + } + if skill["version"] != "0.1.0" { + t.Errorf("skill.version = %v, want 0.1.0", skill["version"]) + } + if skill["sha256"] != strings.Repeat("ab", 32) { + t.Errorf("skill.sha256 = %v", skill["sha256"]) + } +} + +func TestMergeSkillExtras_InitialisesNilExtra(t *testing.T) { + req := x402types.PaymentRequirements{} + rule := &RouteRule{SkillName: "buy-x402"} + + mergeSkillExtras(&req, rule) + + if req.Extra == nil { + t.Fatal("Extra not initialised") + } + skill, ok := req.Extra["skill"].(map[string]any) + if !ok || skill["name"] != "buy-x402" { + t.Errorf("extra.skill missing or malformed: %+v", req.Extra) + } + if _, ok := skill["version"]; ok { + t.Error("empty version must be omitted from extra.skill") + } + if _, ok := skill["sha256"]; ok { + t.Error("empty sha256 must be omitted from extra.skill") + } +} + +// TestVerifier_402_SkillExtra exercises the full 402 path for a type=skill +// route: a paymentless probe must surface accepts[].extra.skill = +// {name, version, sha256} in the JSON body (the wire contract buyers use +// to verify the artifact before paying), while a non-skill route must not +// gain the key. Modeled on the agent-extras coverage. +func TestVerifier_402_SkillExtra(t *testing.T) { + fac := newMockFacilitator(t, mockFacilitatorOpts{}) + sha := strings.Repeat("0a", 32) + v := newTestVerifier(t, fac.URL, []RouteRule{ + { + Pattern: "/services/buy-x402/*", + Price: "0.01", + OfferType: "skill", + SkillName: "buy-x402", + SkillVersion: "0.1.0", + SkillSHA256: sha, + }, + { + Pattern: "/services/plain-http/*", + Price: "0.01", + }, + }) + + probe402 := func(t *testing.T, uri string) map[string]any { + t.Helper() + req := httptest.NewRequest(http.MethodGet, "/verify", nil) + req.Header.Set("X-Forwarded-Uri", uri) + req.Header.Set("X-Forwarded-Host", "obol.stack") + w := httptest.NewRecorder() + v.HandleVerify(w, req) + if w.Code != http.StatusPaymentRequired { + t.Fatalf("status = %d, want 402", w.Code) + } + body, _ := io.ReadAll(w.Body) + var parsed struct { + Accepts []map[string]any `json:"accepts"` + } + if err := json.Unmarshal(body, &parsed); err != nil { + t.Fatalf("402 body is not JSON: %v\n%s", err, body) + } + if len(parsed.Accepts) != 1 { + t.Fatalf("accepts = %d entries, want 1", len(parsed.Accepts)) + } + extra, _ := parsed.Accepts[0]["extra"].(map[string]any) + return extra + } + + t.Run("skill route advertises extra.skill", func(t *testing.T) { + extra := probe402(t, "/services/buy-x402/bundle.tar.gz") + skill, ok := extra["skill"].(map[string]any) + if !ok { + t.Fatalf("extra.skill missing or wrong shape: %+v", extra) + } + if skill["name"] != "buy-x402" || skill["version"] != "0.1.0" || skill["sha256"] != sha { + t.Errorf("extra.skill = %+v, want name/version/sha256 populated", skill) + } + }) + + t.Run("non-skill route emits no extra.skill", func(t *testing.T) { + extra := probe402(t, "/services/plain-http/anything") + if _, ok := extra["skill"]; ok { + t.Errorf("non-skill route must not advertise extra.skill: %+v", extra) + } + }) +} diff --git a/internal/x402/verifier.go b/internal/x402/verifier.go index 9cc54541..48adb307 100644 --- a/internal/x402/verifier.go +++ b/internal/x402/verifier.go @@ -234,6 +234,13 @@ func (v *Verifier) HandleProxy(w http.ResponseWriter, r *http.Request) { return } + // MPP credit-card offers gate through Stripe (authorize -> capture/cancel) + // instead of the x402 facilitator ForwardAuth path. + if rule.IsCard() { + v.serveCardGated(w, r, rule, requirement, extensions, proxy, defaultCardGateway, defaultSPTGuard) + return + } + wallet := cfg.Wallet if rule.PayTo != "" { wallet = rule.PayTo @@ -313,6 +320,12 @@ func (v *Verifier) matchPaidRouteFull(cfg *PricingConfig, uri string) (*RouteRul return nil, x402types.PaymentRequirements{}, nil, nil, ChainInfo{}, AssetInfo{}, false } + // Card routes settle off-chain via Stripe; skip chain/asset resolution + // and emit the MPP credit-card 402 option instead. + if rule.IsCard() { + return rule, buildCardRequirement(rule), nil, prometheusLabels(rule), ChainInfo{}, AssetInfo{}, true + } + wallet := cfg.Wallet if rule.PayTo != "" { wallet = rule.PayTo @@ -334,6 +347,7 @@ func (v *Verifier) matchPaidRouteFull(cfg *PricingConfig, uri string) (*RouteRul requirement := BuildV2RequirementWithAsset(chain, asset, rule.Price, wallet, rule.MaxTimeoutSeconds) mergeAgentExtras(&requirement, rule) mergeDatasetExtras(&requirement, rule) + mergeSkillExtras(&requirement, rule) extensions := WithBazaar(BuildExtensionsForAsset(asset), rule.OfferType, rule.Model) return rule, requirement, extensions, prometheusLabels(rule), chain, asset, true } @@ -425,6 +439,28 @@ func mergeDatasetExtras(req *x402types.PaymentRequirements, rule *RouteRule) { req.Extra["dataset"] = dataset } +// mergeSkillExtras adds the skill bundle identity from a RouteRule to the +// requirement's Extra map as extra.skill = {name, version, sha256} so +// buyers probing a 402 on a type=skill offer can verify the artifact they +// are about to pay for. No-op for non-skill rules (SkillName empty). +// Strictly additive — mirrors mergeAgentExtras above. +func mergeSkillExtras(req *x402types.PaymentRequirements, rule *RouteRule) { + if rule.SkillName == "" { + return + } + if req.Extra == nil { + req.Extra = make(map[string]interface{}) + } + skill := map[string]any{"name": rule.SkillName} + if rule.SkillVersion != "" { + skill["version"] = rule.SkillVersion + } + if rule.SkillSHA256 != "" { + skill["sha256"] = rule.SkillSHA256 + } + req.Extra["skill"] = skill +} + // buildPaymentDisplay turns the matched rule + chain + asset into pre-formatted // strings for the HTML 402 page. The atomic-amount input is the value already // computed for the wire requirement (rule.Price * 10^decimals), so passing diff --git a/internal/x402/verifier_test.go b/internal/x402/verifier_test.go index 036a9298..a20ae64f 100644 --- a/internal/x402/verifier_test.go +++ b/internal/x402/verifier_test.go @@ -13,9 +13,9 @@ import ( "testing" "time" - x402types "github.com/x402-foundation/x402/go/types" dto "github.com/prometheus/client_model/go" "github.com/prometheus/common/expfmt" + x402types "github.com/x402-foundation/x402/go/types" ) // ── Mock facilitator ──────────────────────────────────────────────────────── diff --git a/internal/x402mcp/bountyreport.go b/internal/x402mcp/bountyreport.go new file mode 100644 index 00000000..ccd4824a --- /dev/null +++ b/internal/x402mcp/bountyreport.go @@ -0,0 +1,205 @@ +package x402mcp + +// bounty_report — a FREE companion tool on the MCP server: serves the A2UI +// report deliverable of a settled ServiceBounty. Reports are gate:local in v1 +// (the fulfiller's runner persists them on disk under the agent hierarchy); +// the cross-party paid gate (gate: mcp-x402) is this same tool wrapped with +// the existing payment wrapper — no new machinery. +// +// Variant selection is a2ui catalog negotiation: the caller passes its +// supportedCatalogIds in preference order and the first task-package variant +// whose catalogId matches wins. kind=declarative returns the raw A2UI +// v1.0-candidate message-list JSON (native render, no iframes); kind=mcp-app +// wraps the self-contained HTML into a `custom` McpApp node with url_encoded +// content — the CLIENT supplies double-iframe isolation (sandbox proxy + +// srcdoc inner frame, never allow-same-origin); this server only returns JSON. + +import ( + "context" + "encoding/json" + "errors" + "fmt" + "net/url" + "os" + "path/filepath" + "strings" + + mcpsdk "github.com/modelcontextprotocol/go-sdk/mcp" + + "github.com/ObolNetwork/obol-stack/internal/bounty" +) + +type bountyReportArgs struct { + Name string `json:"name"` + Namespace string `json:"namespace"` + TaskType string `json:"taskType"` + SupportedCatalogIDs []string `json:"supportedCatalogIds"` +} + +// bountyReportMeta is the optional task.json sidecar the runner writes next to +// the report files, removing task-type inference ambiguity. +type bountyReportMeta struct { + TypeRef string `json:"typeRef"` +} + +// AddBountyReportTool registers the free bounty_report tool. reportsDir layout: +// /// (+ optional task.json +// sidecar {"typeRef":"benchmark@v1"}). +func AddBountyReportTool(server *mcpsdk.Server, reportsDir string) { + server.AddTool(&mcpsdk.Tool{ + Name: "bounty_report", + Description: "Fetch a ServiceBounty's A2UI report. Pass supportedCatalogIds in preference " + + "order (a2ui catalog negotiation): a declarative match returns the A2UI v1.0 message list; " + + "obol.org:mcp-app/v1 returns a custom McpApp node (self-contained HTML, render in the " + + "double-iframe sandbox). Args: {name, namespace?, taskType?, supportedCatalogIds?}.", + InputSchema: map[string]any{ + "type": "object", + "required": []string{"name"}, + "properties": map[string]any{ + "name": map[string]any{"type": "string", "description": "Bounty name."}, + "namespace": map[string]any{"type": "string", "description": "Bounty namespace (default hermes-obol-agent)."}, + "taskType": map[string]any{"type": "string", "description": "Task type ref (e.g. benchmark@v1); inferred from the task.json sidecar or the report files when omitted."}, + "supportedCatalogIds": map[string]any{"type": "array", "items": map[string]any{"type": "string"}, "description": "Client-supported catalog ids in preference order."}, + }, + }, + }, func(_ context.Context, req *mcpsdk.CallToolRequest) (*mcpsdk.CallToolResult, error) { + var args bountyReportArgs + if len(req.Params.Arguments) > 0 { + if err := json.Unmarshal(req.Params.Arguments, &args); err != nil { + return errResult(fmt.Sprintf("bad arguments: %v", err)), nil + } + } + out, err := renderBountyReport(reportsDir, args) + if err != nil { + return errResult(err.Error()), nil + } + return textResult(out), nil + }) +} + +// renderBountyReport resolves the report directory, negotiates the variant, +// and renders it. Exposed for tests. +func renderBountyReport(reportsDir string, args bountyReportArgs) (string, error) { + if strings.TrimSpace(args.Name) == "" { + return "", errors.New("name is required") + } + if args.Namespace == "" { + args.Namespace = "hermes-obol-agent" + } + // The two path segments come from the caller — never let them escape the + // reports root. + for _, segment := range []string{args.Name, args.Namespace} { + if segment != filepath.Base(segment) || segment == ".." || segment == "." { + return "", fmt.Errorf("invalid path segment %q", segment) + } + } + + dir := filepath.Join(reportsDir, args.Namespace, args.Name) + if info, err := os.Stat(dir); err != nil || !info.IsDir() { + return "", fmt.Errorf("no report found for %s/%s", args.Namespace, args.Name) + } + + t, err := resolveReportTaskType(dir, args.TaskType) + if err != nil { + return "", err + } + + variant, raw, err := negotiateReportVariant(dir, t, args.SupportedCatalogIDs) + if err != nil { + return "", err + } + + if variant.Kind == "mcp-app" { + node := map[string]any{ + "type": "custom", + "name": "McpApp", + "properties": map[string]any{ + "title": fmt.Sprintf("%s — %s report", args.Name, t.Ref()), + // decodeURIComponent-safe percent encoding (QueryEscape's '+' + // for space would corrupt the HTML on decode). + "content": "url_encoded:" + strings.ReplaceAll(url.QueryEscape(string(raw)), "+", "%20"), + }, + } + encoded, err := json.Marshal(node) + if err != nil { + return "", err + } + return string(encoded), nil + } + return string(raw), nil +} + +// resolveReportTaskType picks the task type: explicit arg > task.json sidecar > +// first enabled type with a variant surface present in dir. +func resolveReportTaskType(dir, explicit string) (bounty.TaskType, error) { + if explicit != "" { + return bounty.Resolve(explicit) + } + + if raw, err := os.ReadFile(filepath.Join(dir, "task.json")); err == nil { + var meta bountyReportMeta + if err := json.Unmarshal(raw, &meta); err == nil && meta.TypeRef != "" { + return bounty.Resolve(meta.TypeRef) + } + } + + enabled, err := bounty.Enabled() + if err != nil { + return bounty.TaskType{}, err + } + for _, t := range enabled { + for _, v := range t.Deliverable.Report.Variants { + if _, err := os.Stat(filepath.Join(dir, filepath.Base(v.Surface))); err == nil { + return t, nil + } + } + } + return bounty.TaskType{}, fmt.Errorf("cannot infer task type for %s (write a task.json sidecar or pass taskType)", dir) +} + +// negotiateReportVariant applies a2ui catalog negotiation: walk the caller's +// supportedCatalogIds in preference order, return the first variant that +// matches AND whose surface file exists. No ids → first variant present. +func negotiateReportVariant(dir string, t bounty.TaskType, supported []string) (bounty.ReportVariant, []byte, error) { + variants := t.Deliverable.Report.Variants + if len(variants) == 0 { + return bounty.ReportVariant{}, nil, fmt.Errorf("task type %s declares no report variants", t.Ref()) + } + + read := func(v bounty.ReportVariant) []byte { + raw, err := os.ReadFile(filepath.Join(dir, filepath.Base(v.Surface))) + if err != nil { + return nil + } + return raw + } + + if len(supported) > 0 { + for _, id := range supported { + for _, v := range variants { + if v.CatalogID == id { + if raw := read(v); raw != nil { + return v, raw, nil + } + } + } + } + return bounty.ReportVariant{}, nil, fmt.Errorf( + "no variant of %s matches supportedCatalogIds %v (available: %s)", t.Ref(), supported, variantCatalogs(variants)) + } + + for _, v := range variants { + if raw := read(v); raw != nil { + return v, raw, nil + } + } + return bounty.ReportVariant{}, nil, fmt.Errorf("no report files present in %s", dir) +} + +func variantCatalogs(variants []bounty.ReportVariant) string { + ids := make([]string, 0, len(variants)) + for _, v := range variants { + ids = append(ids, v.CatalogID) + } + return strings.Join(ids, ", ") +} diff --git a/internal/x402mcp/bountyreport_test.go b/internal/x402mcp/bountyreport_test.go new file mode 100644 index 00000000..8e24d772 --- /dev/null +++ b/internal/x402mcp/bountyreport_test.go @@ -0,0 +1,146 @@ +package x402mcp + +import ( + "encoding/json" + "net/url" + "os" + "path/filepath" + "strings" + "testing" +) + +const ( + basicCatalogID = "https://a2ui.org/specification/v1_0/catalogs/basic/catalog.json" + mcpAppCatalogID = "obol.org:mcp-app/v1" +) + +func writeReportFixture(t *testing.T) string { + t.Helper() + root := t.TempDir() + dir := filepath.Join(root, "hermes-obol-agent", "smoke-bench") + if err := os.MkdirAll(dir, 0o755); err != nil { + t.Fatal(err) + } + files := map[string]string{ + "report.a2ui.json": `{"messages":[{"version":"v1.0"}]}`, + "report.app.html": "score & verdict", + "task.json": `{"typeRef":"benchmark@v1"}`, + } + for name, content := range files { + if err := os.WriteFile(filepath.Join(dir, name), []byte(content), 0o644); err != nil { + t.Fatal(err) + } + } + return root +} + +func TestBountyReport_DeclarativeByDefault(t *testing.T) { + root := writeReportFixture(t) + + out, err := renderBountyReport(root, bountyReportArgs{Name: "smoke-bench"}) + if err != nil { + t.Fatalf("render: %v", err) + } + if !strings.Contains(out, `"version":"v1.0"`) { + t.Errorf("default render should be the raw declarative A2UI JSON, got %q", out) + } +} + +func TestBountyReport_NegotiatesMcpApp(t *testing.T) { + root := writeReportFixture(t) + + out, err := renderBountyReport(root, bountyReportArgs{ + Name: "smoke-bench", + SupportedCatalogIDs: []string{mcpAppCatalogID, basicCatalogID}, + }) + if err != nil { + t.Fatalf("render: %v", err) + } + + var node struct { + Type string `json:"type"` + Name string `json:"name"` + Properties struct { + Content string `json:"content"` + Title string `json:"title"` + } `json:"properties"` + } + if err := json.Unmarshal([]byte(out), &node); err != nil { + t.Fatalf("mcp-app render is not a JSON node: %v", err) + } + if node.Type != "custom" || node.Name != "McpApp" { + t.Errorf("node = %s/%s, want custom/McpApp", node.Type, node.Name) + } + if !strings.HasPrefix(node.Properties.Content, "url_encoded:") { + t.Fatalf("content must be url_encoded:-prefixed, got %q", node.Properties.Content[:20]) + } + decoded, err := url.QueryUnescape(strings.TrimPrefix(node.Properties.Content, "url_encoded:")) + if err != nil { + t.Fatalf("content does not decode: %v", err) + } + if decoded != "score & verdict" { + t.Errorf("decoded content = %q (encoding must be decodeURIComponent-safe)", decoded) + } +} + +func TestBountyReport_PrefersClientOrder(t *testing.T) { + root := writeReportFixture(t) + + out, err := renderBountyReport(root, bountyReportArgs{ + Name: "smoke-bench", + SupportedCatalogIDs: []string{basicCatalogID, mcpAppCatalogID}, + }) + if err != nil { + t.Fatalf("render: %v", err) + } + if strings.Contains(out, "McpApp") { + t.Error("client preferred the basic catalog; declarative variant must win") + } +} + +func TestBountyReport_NoCatalogMatch(t *testing.T) { + root := writeReportFixture(t) + + _, err := renderBountyReport(root, bountyReportArgs{ + Name: "smoke-bench", + SupportedCatalogIDs: []string{"example.com:unknown/v9"}, + }) + if err == nil || !strings.Contains(err.Error(), "supportedCatalogIds") { + t.Errorf("no-match must error with the available catalogs, got %v", err) + } +} + +func TestBountyReport_InferenceWithoutSidecar(t *testing.T) { + root := writeReportFixture(t) + if err := os.Remove(filepath.Join(root, "hermes-obol-agent", "smoke-bench", "task.json")); err != nil { + t.Fatal(err) + } + + out, err := renderBountyReport(root, bountyReportArgs{Name: "smoke-bench"}) + if err != nil { + t.Fatalf("inference from report files should work: %v", err) + } + if !strings.Contains(out, `"version":"v1.0"`) { + t.Errorf("unexpected render: %q", out) + } +} + +func TestBountyReport_RejectsPathTraversal(t *testing.T) { + root := writeReportFixture(t) + + for _, name := range []string{"../smoke-bench", "..", "a/b"} { + if _, err := renderBountyReport(root, bountyReportArgs{Name: name}); err == nil { + t.Errorf("name %q must be rejected (path traversal)", name) + } + } + if _, err := renderBountyReport(root, bountyReportArgs{Name: "smoke-bench", Namespace: "../hermes-obol-agent"}); err == nil { + t.Error("namespace traversal must be rejected") + } +} + +func TestBountyReport_MissingBounty(t *testing.T) { + root := writeReportFixture(t) + if _, err := renderBountyReport(root, bountyReportArgs{Name: "nonexistent"}); err == nil { + t.Error("missing report dir must error") + } +} diff --git a/internal/x402mcp/server.go b/internal/x402mcp/server.go index 255254fc..03d71fcd 100644 --- a/internal/x402mcp/server.go +++ b/internal/x402mcp/server.go @@ -56,6 +56,11 @@ type Options struct { FacilitatorURL string // x402 facilitator (verify/settle); caller supplies a default Upstream string // backend HTTP service the paid tool POSTs the buyer's JSON args to (e.g. a weather/data API) UpstreamHeaders map[string]string // optional auth headers for the backend (e.g. "X-Api-Key": ""); set server-side, never exposed to buyers + + // BountyReportsDir, when set, registers the free bounty_report tool + // serving ServiceBounty A2UI reports from + // ///. + BountyReportsDir string } // Serve builds and runs the x402-paid MCP server in the foreground over @@ -121,6 +126,12 @@ func Serve(ctx context.Context, opts Options) error { return textResult("pong"), nil }) + // Free bounty-report tool (unwrapped — reports are gate:local in v1; the + // mcp-x402 gate wraps this same handler with the payment wrapper later). + if strings.TrimSpace(opts.BountyReportsDir) != "" { + AddBountyReportTool(server, opts.BountyReportsDir) + } + // Paid tool: forward the buyer's JSON arguments to the backend service and // return the response. The arg shape is the backend's own request body — // documented by the operator in opts.Description (e.g. a get_weather tool: diff --git a/justfile b/justfile index 802f66b4..95db1126 100644 --- a/justfile +++ b/justfile @@ -152,6 +152,8 @@ generate: purchaserequests) target="purchaserequest-crd.yaml" ;; registrationrequests) target="registrationrequest-crd.yaml" ;; serviceoffers) target="serviceoffer-crd.yaml" ;; + servicebounties) target="servicebounty-crd.yaml" ;; + evaluatorenrollments) target="evaluatorenrollment-crd.yaml" ;; *) target="${plural%s}-crd.yaml" ;; esac mv "$f" "$out/$target" diff --git a/plans/bounty-ane-marketplace-design.md b/plans/bounty-ane-marketplace-design.md new file mode 100644 index 00000000..a6d5ee78 --- /dev/null +++ b/plans/bounty-ane-marketplace-design.md @@ -0,0 +1,765 @@ +# Bounties: a demand-side marketplace for AI work on a distributed ANE fleet + +**Status:** Design / buildable brainstorm · **Owner:** Lead Architect · **Target:** obol-stack `obol.org/v1alpha1` + +> **Naming (locked):** the CRD Kind is **`ServiceBounty`** (plural `servicebounties`, short `sb`) so it sorts beside `ServiceOffer` in `kubectl get crds` and reads as its matched pair. The CLI verb stays **`obol bounty`** — Kind ≠ verb, exactly as `obol sell` creates a `ServiceOffer`. "Bounty" remains the human/CLI/domain concept (e.g. `BountyRunner`, `BountyEscrow.sol`); only the Kubernetes resource carries the `Service…` prefix. + +> ⚠️ **READ FIRST — must-fix corrections (from adversarial review, Appendix B).** The body below is the design exploration; these five corrections OVERRIDE it where they conflict: +> 1. **Payout does NOT reuse the buyer-sidecar.** `internal/x402/buyer/proxy.go` is a request-time `http.RoundTripper` that burns one voucher only when a *live* x402 upstream returns `<400` (`ConfirmSpend`, `signer.go:295`); money flows buyer→seller. A bounty needs escrow→fulfiller-on-verdict — the inverse. Honest v0 payout = a **coordinator agent** that, on `Verified`, submits one poster-pre-signed ERC-3009 voucher (`payTo`=fulfiller) to the facilitator `/settle` directly. That coordinator **is a trusted release authority** — say so; don't claim "trustless on shipped code." +> 2. **Agent RBAC is cluster-wide, not namespace-scoped.** `openclaw-monetize-write` is a ClusterRole+ClusterRoleBinding to both agent SAs. Put `servicebounties` in a **namespaced Role/RoleBinding**, or state the cluster-wide posture plainly. +> 3. **Remove `escrowRef.namespace` entirely** — force same-namespace by construction (a runtime string-compare guard is a future-refactor footgun given cluster-wide PurchaseRequest write). +> 4. **Cut the ANE/Ray worker substrate (§6) from v0.** Fulfillment is opaque: any process that emits a signed deliverable. ANE inference is real but niche (≤8B, 2–5× slower than the same Mac's GPU); ANE *training* is research PoC. See Appendix A. +> 5. **Hard invariant + test: a ServiceBounty NEVER creates an HTTPRoute/Middleware/Secret/Namespace.** The servicebounty-controller has zero route/secret creation capability; discovery rides only the existing `/skill.md` + `agent-registration.json`. Extend `internal/embed/embed_crd_test.go`. +> 6. **§5.3's stake-weighted verifier selection/slashing and the §9 v1/v2 stake/juror-committee roadmap are SUPERSEDED by §11 (evaluator market, 2026-06-10).** Post-scaffold design steer: no validator staking, no slashing — verification is an OBOL-paid evaluator market with a median-of-k quorum and a Shadow→Probation→Full reputation ladder, **on by default** with `--dangerously-skip-verification` as the explicit opt-out. Wherever the body says "stake-weighted", "slashing", "challenge bond", or "juror committee" for verification, read §11. + +--- + +## 1. Vision + +Someone on the timeline posted a public bounty — *"benchmark DeepSeek-V4-Flash on real hardware, $500"* — and a stranger ran it on their MacBook and got paid $500 USDT on Polygon. That transaction happened on Twitter and a block explorer, not on a marketplace, because no marketplace for it exists. obol-stack already ships the **seller half** of exactly this economy: a `ServiceOffer` declares "I will serve work for pay," the controller publishes an x402-gated route (`internal/serviceoffercontroller/controller.go:528-532`), and the buyer sidecar settles ERC-3009 vouchers after success (`internal/x402/buyer/`). What's missing is the **buyer-initiated half**: a way to *post demand* — "here is money, here is the work, here is how I'll know it's done." This document specifies the **ServiceBounty**: the structural inverse of a ServiceOffer that turns obol-stack into a two-sided marketplace, with a distributed Apple-silicon fleet (GPU-first, ANE where it honestly helps) as the execution substrate. + +--- + +## 2. Core insight: the ServiceBounty is the inverse of a ServiceOffer + +A ServiceOffer and a ServiceBounty are the two halves of one market, mirror-imaged on every axis: + +| Axis | `ServiceOffer` (supply) | `ServiceBounty` (demand) | +|---|---|---| +| Who initiates payment | Buyer, at request time | **Poster, up front, escrowed** | +| Money direction | buyer → `payment.payTo` (the seller) | escrow → fulfiller (`payTo` filled at claim) | +| Terminal state | a route that **stays up** serving traffic (`Ready`, controller.go:528-532) | a resource that **settles and closes** (`Paid`/`Refunded`) | +| Work latency | milliseconds | minutes → hours → days | +| Acceptance signal | HTTP `<400` from upstream | a **verifier judgement on a deliverable** | +| Counterparties | 1 buyer ↔ 1 seller | 1 poster ↔ **N fulfillers** (race / split / redundant) | +| Side-effect rail | Middleware + HTTPRoute (controller.go:660/:695) | escrow `PurchaseRequest` payout (types.go:565) | +| Time-box | `DrainAt` graceful teardown (types.go:142) | `deadline` → expiry → refund | +| Sibling CR | `RegistrationRequest` (on-chain side effect, controller.go:802) | `PurchaseRequest` (escrow side effect, types.go:536) | +| Trust vocab | `registration.supportedTrust[]` (types.go:320) | `acceptance.verifier` (same enum, repurposed) | + +**Crisp statement:** *A ServiceOffer is standing supply that converges to one live route and stays up. A ServiceBounty is time-boxed demand that converges to one paid deliverable and closes. They are the same state-machine skeleton run in opposite directions, sharing the same money rail (x402/ERC-3009), the same identity rail (ERC-8004), and the same controller plumbing — and together they are a complete marketplace.* + +This symmetry is the spine of the whole design. Everywhere a perspective proposed new machinery, I checked whether the *inverse* of existing ServiceOffer machinery already does the job. Usually it does. + +--- + +## 3. Data model — the `ServiceBounty` CRD + +**The call: a new top-level `ServiceBounty` CRD in `obol.org/v1alpha1`, plus a co-located `servicebounty-controller` in the existing `serviceoffer-controller` binary, reusing `PurchaseRequest` as the escrow/payout primitive.** + +### 3.1 Why a new CRD and not `type=bounty` or a generic `WorkRequest` + +I considered three designs and reject two: + +- **Rejected: `ServiceOffer.type=bounty`.** ServiceOffer's entire reconcile loop converges toward *keeping an HTTPRoute + Traefik Middleware live* (`reconcilePaymentGate` controller.go:660, `reconcileRoute` controller.go:695). A bounty has **no upstream Service to route to** and **inverts the meaning of `payment.payTo`** (seller-receives → escrow-pays-out). Overloading the enum forces every consumer — the verifier's `serviceoffer_source`, `/skill.md`, `obol sell list/status`, the `IsInference()/IsAgent()` helpers — to learn a type with no route, no upstream, and reversed money flow. Large blast radius for a leaky abstraction. +- **Rejected: a generic `WorkRequest`.** Too abstract to validate. The whole value of a CRD is that `kubectl`/the API server enforce a schema; a `map[string]any` task blob defers all validation to the controller and loses the per-type acceptance gates that make verification possible. +- **Chosen: a dedicated `ServiceBounty` CRD.** It mirrors the ServiceOffer shape exactly (group `obol.org`, version `v1alpha1`, status conditions, finalizers), so it inherits the codebase's conventions and RBAC posture, while keeping the demand-side lifecycle (`Open → Claimed → … → Paid`, with `Disputed/Expired/Refunded`) cleanly separate from the route-publication loop. This is the *same architectural precedent the codebase already chose* for `RegistrationRequest`: a sibling CR + a sibling reconcile pass in the same binary (controller.go:524, :802), isolating a side-effecting concern from the main loop. + +The unimplemented `fine-tuning` ServiceOffer enum value (types.go:105) is the **supply-side dual** ("I sell fine-tuning capacity") of a demand-side `fine-tune` bounty. I leave that enum untouched and make `ServiceBounty.spec.task.type` include `fine-tune`, so the two meet in the middle without entangling. + +### 3.2 Field table + +`ServiceBounty` is **namespaced** (mirrors ServiceOffer, inherits per-namespace RBAC). Register `ServiceBountyKind = "ServiceBounty"`, `ServiceBountyResource = "servicebounties"`, `ServiceBountyGVR` next to the existing GVRs (types.go:48-67) — **plural `servicebounties`, singular `servicebounty`, shortName `sb`**, so it sorts beside `serviceoffers`. + +| Field | Type | Reuses (file:line) | Notes | +|---|---|---|---| +| `spec.task.type` | enum `benchmark\|fine-tune\|serve\|http\|generic` | `ServiceOfferSpec.Type` (types.go:105) | `fine-tune` deliberately mirrors the unimplemented `fine-tuning` supply hook. | +| `spec.task.runner` | string | new | `BountyRunner` plugin id (§6), e.g. `mlx-lora`, `anemll-serve`. Opaque to the controller. | +| `spec.task.requires[]` | `[]string` | new | Capability tags the fulfiller node must advertise (e.g. `serve.ane`, `finetune.mlx`). Matched at claim. | +| `spec.task.targetModel` | `{name, runtime}` | `ServiceOfferModel` (types.go:166-174) | Reused verbatim. Runtime enum `ollama\|vllm\|tgi`. | +| `spec.task.datasetRef` | `{uri, hash, format}` | new | Content-addressed dataset pointer; hash makes verification deterministic. | +| `spec.task.harnessRef` | `{name, uri, version}` | new | Pinned eval harness / trainer image, content-addressed. | +| `spec.task.params` | `map[string]string` | `ServiceOfferSpec.Provenance` shape (types.go:129) | Free-form knobs (`epochs`, `lr`, `seqlen`, `tasks`). Keeps schema stable across task types. | +| `spec.acceptance.criteria[]` | `[]{metric, op, threshold, weight}` | new | Machine-checkable gates. The bounty's *raison d'être*. | +| `spec.acceptance.verifier` | enum `self-attested\|harness-rerun\|tee-attestation\|consensus\|poster-manual` | `supportedTrust[]` vocab (types.go:320) | How a submission is checked (§5). | +| `spec.acceptance.deliverableSchema` | `{artifacts[]{name,kind,required}, resultHashRequired}` | new | Declares a valid submission's contents. | +| `spec.reward` | `ServiceOfferPayment` | `ServiceOfferPayment` (types.go:211-247) | **Reused whole.** `method=crypto\|card` (#608), `network`, `asset`, `card{...}`, `price`. `payTo` here = escrow-return address. | +| `spec.reward.price.perRequest` | string | `PriceTable.PerRequest` (types.go:299) | The flat lump-sum reward (the "$500"). | +| `spec.reward.price.perEpoch` | string | `PriceTable.PerEpoch` (types.go:305) | Milestone/staged payout for fine-tunes. | +| `spec.escrowRef` | `{name, namespace}` → `PurchaseRequest` | `PurchaseRequest` (types.go:536) + `AgentRef` shape (types.go:159-164) | Poster's pre-signed reward auths (`PreSignedAuths[]`, types.go:565). **Confused-deputy guard: namespace MUST equal bounty namespace** (copy agent_resolver.go:46). | +| `spec.deadline` | `*metav1.Time` | `DrainAt` pattern (types.go:142) | Past deadline + no `Verified` → `Expired` → `Refunded`. Reuse requeue-at-expiry logic. | +| `spec.claimGracePeriod` | `*metav1.Duration` | `DrainGracePeriod` (types.go:148) | How long a `Claimed` fulfiller has before the claim lapses and the bounty re-opens. | +| `spec.maxFulfillers` | `int64` (default 1) | new | `1` = single-winner; `>1` = first-N-valid paid (split/redundant). | +| `spec.firstValidWins` | `bool` (default true) | new | First submission passing `acceptance` is auto-paid; controller stops accepting claims. | +| `spec.bond` | `{required, amount, token}` | new | Fulfiller anti-griefing stake (§4, §5). | +| `spec.registration` | `ServiceOfferRegistration` | `ServiceOfferRegistration` (types.go:308-333) | Optional ERC-8004 publication of the bounty as **discoverable demand**. | +| `spec.provenance` | `map[string]string` | `ServiceOfferSpec.Provenance` (types.go:129) | Why this bounty exists. | + +**Status** reuses the shared `Condition` type and the `isConditionTrue` AND-rollup idiom (controller.go:528-532): + +```go +type ServiceBountyStatus struct { + ObservedGeneration int64 `json:"observedGeneration,omitempty"` + Phase string `json:"phase,omitempty"` // human rollup, like AgentStatus.Phase (types.go:718) + Conditions []Condition `json:"conditions,omitempty"` // shared type + Claims []ServiceBountyClaim `json:"claims,omitempty"` // observed fulfiller bindings + EscrowFunded bool `json:"escrowFunded,omitempty"` + EscrowRemaining string `json:"escrowRemaining,omitempty"`// mirrors PurchaseRequest.Status.Remaining (types.go:638) + WinningClaim string `json:"winningClaim,omitempty"` + PayoutTxHash string `json:"payoutTxHash,omitempty"` // like RegistrationTxHash (types.go:355) + RefundTxHash string `json:"refundTxHash,omitempty"` +} +``` + +Claims are *observed* facts → they live in `status.claims[]`, not spec (a separate `Claim` CRD over-engineers the common single-winner case). Each `ServiceBountyClaim` binds `{fulfillerAddress, fulfillerAgentRef, claimedAt, submission{artifacts,resultHash,metrics,submittedAt}, phase, payoutRef}`. + +**Lifecycle** (machine truth = condition set; `phase` is the human rollup): + +``` + ┌─────────────► Expired ──► Refunded + │ (deadline, no Verified) +Open ──► Claimed ──► InProgress ──► Submitted ──► Verified ──► Paid + ▲ │ │ │ + └─────────┘ (claimGracePeriod lapses) └─► Rejected └─► Disputed ──► (Verified | Refunded) +``` + +Condition set, each mirroring an inverse ServiceOffer condition: `EscrowFunded` (inverse of `PaymentGateReady`), `Open`, `Claimed`, `Submitted`, `Verified` (the core gate), `Paid` (inverse of `Registered`). The `done` rollup: + +```go +done := isConditionTrue(status,"Verified") && isConditionTrue(status,"Paid") // mirrors controller.go:528-532 +``` + +### 3.3 Three example YAMLs + +**(a) Benchmark — the motivating $500 case** + +```yaml +apiVersion: obol.org/v1alpha1 +kind: ServiceBounty +metadata: { name: bench-deepseek-v4-flash, namespace: hermes-obol-agent } +spec: + task: + type: benchmark + runner: bench + requires: ["benchmark"] + targetModel: { name: "deepseek-v4-flash", runtime: vllm } # ServiceOfferModel, types.go:166 + harnessRef: { name: lm-eval-harness, uri: "ghcr.io/eleutherai/lm-eval-harness", version: v0.4.3 } + params: { tasks: "mmlu,gsm8k,humaneval", hardwareClass: "M4-Max-40c-128g", seed: "1234", dtype: fp16 } + acceptance: + criteria: + - { metric: mmlu, op: ">=", threshold: "0.0", weight: 1 } # report-only; eval SCORE is the verifiable gold case + verifier: consensus # N-of-M re-run on committed dataset (§5) + deliverableSchema: + resultHashRequired: true + artifacts: + - { name: results.json, kind: eval-report, required: true } + - { name: run.manifest, kind: provenance, required: true } # signed run-manifest (§5.0) + reward: # ServiceOfferPayment, types.go:211 + method: crypto + network: base + payTo: "0xPOSTER...aaaa" # escrow-return addr + asset: { symbol: USDT, decimals: 6, transferMethod: eip3009 } + price: { perRequest: "500.00" } # the $500 lump sum + escrowRef: { name: bench-deepseek-escrow, namespace: hermes-obol-agent } # PurchaseRequest, types.go:536 + deadline: "2026-07-01T00:00:00Z" # DrainAt pattern, types.go:142 + claimGracePeriod: "72h" + maxFulfillers: 1 + firstValidWins: true + bond: { required: true, amount: "750.00", token: USDT } # 1.5x → lying is -EV + registration: + enabled: true + name: "Benchmark DeepSeek-V4-Flash" + skills: ["evaluation/benchmarking"] + supportedTrust: ["reputation"] +``` + +**(b) Fine-tune — staged, pay-per-epoch** + +```yaml +apiVersion: obol.org/v1alpha1 +kind: ServiceBounty +metadata: { name: ft-qwen-coder, namespace: hermes-obol-agent } +spec: + task: + type: fine-tune # mirrors the unimplemented supply hook, types.go:105 + runner: mlx-lora # MLX GPU trainer (NOT ane-train; see §6) + requires: ["finetune.mlx"] + targetModel: { name: "qwen3.5:9b", runtime: vllm } + datasetRef: { uri: "ipfs://bafy.../sql-pairs-v2.jsonl", hash: "sha256:9f2c...", format: jsonl } + harnessRef: { name: mlx-lm.lora, uri: "ghcr.io/obol/mlx-tune", version: 0.6.0 } + params: { epochs: "3", lr: "1e-4", loraRank: "32", seqlen: "4096" } + acceptance: + criteria: + - { metric: sql_exec_acc, op: ">=", threshold: "0.78", weight: 3 } # held-out execution accuracy + - { metric: eval_loss, op: "<=", threshold: "0.85", weight: 2 } + verifier: harness-rerun # held-out re-eval on committed checkpoint hash (§5) + deliverableSchema: + resultHashRequired: true + artifacts: + - { name: adapter.safetensors, kind: weights, required: true } + - { name: eval.json, kind: eval-report, required: true } + reward: + method: crypto + network: base-sepolia + payTo: "0xPOSTER...bbbb" + asset: { symbol: USDC, decimals: 6, transferMethod: eip3009 } + price: { perEpoch: "40.00", perRequest: "120.00" } # PerEpoch staged (types.go:305) + 120 on final pass + escrowRef: { name: ft-qwen-escrow, namespace: hermes-obol-agent } + deadline: "2026-06-20T00:00:00Z" + claimGracePeriod: "168h" + maxFulfillers: 1 + firstValidWins: false # poster reviews before final release + bond: { required: true, amount: "200.00", token: USDC } +``` + +**(c) Serve — keep ComfyUI up, pay with a credit card (MPP #608)** + +```yaml +apiVersion: obol.org/v1alpha1 +kind: ServiceBounty +metadata: { name: host-comfyui-sdxl, namespace: hermes-obol-agent } +spec: + task: + type: serve + runner: comfyui + requires: ["render"] + targetModel: { name: "sdxl-comfyui", runtime: tgi } + harnessRef: { name: comfyui, uri: "ghcr.io/comfyanonymous/comfyui", version: v0.3 } + params: { workflow: "txt2img-sdxl", endpoint_kind: openai-compat, uptime_window: 30d } + acceptance: + criteria: + - { metric: uptime_pct, op: ">=", threshold: "99.5", weight: 3 } # Prometheus SLA, automatic + - { metric: p95_latency_ms, op: "<=", threshold: "4000", weight: 2 } + verifier: tee-attestation # enclave-bound device identity (§5) + deliverableSchema: + resultHashRequired: false + artifacts: + - { name: served-endpoint, kind: http-endpoint, required: true } # a live URL, monitored + - { name: attestation.json, kind: tee-quote, required: true } + reward: + method: card # MPP credit-card, #608, types.go:216 + card: { provider: stripe, account: "acct_1ObolHostExample", currency: usd } + price: { perHour: "0.50", perRequest: "300.00" } # PerHour for serving window (types.go:303) + escrowRef: { name: host-comfyui-escrow, namespace: hermes-obol-agent } # Stripe manual-capture hold (§4) + deadline: "2026-08-01T00:00:00Z" + claimGracePeriod: "24h" + maxFulfillers: 3 # up to 3 redundant hosts paid + firstValidWins: true +``` + +--- + +## 4. Escrow & payment + +**The invariant that survives every phase: the controller never holds keys.** This is already enforced — the controller's only secret access is `secretRef` plumbing, and the agent-resolver confused-deputy guard (agent_resolver.go:46) exists precisely to stop it brokering credentials it shouldn't. All signing lives in agent wallets / remote-signer `:9000` (`internal/openclaw/wallet.go`) / Secure Enclave (`internal/enclave/enclave_darwin.go`). The controller is declarative: it watches `ServiceBounty`/`PurchaseRequest`, drives the state machine, and **observes** tx hashes it never produces. We keep this absolute. + +The hard problem is the **temporal gap**. The shipped x402 path is a request-time micropayment: work completes in milliseconds, so the voucher *is* the conditional release and no custody is needed. A bounty inverts this — funds must commit *up front* and release *hours or days later* on a deliverable. That gap is what forces an escrow design. + +### 4.1 The call: MVP = conditional-voucher escrow; end-state = on-chain `BountyEscrow` contract — and settlement is pluggable + +I evaluated three options. Resolving the disagreement between Perspectives A and B (A reuses `PurchaseRequest` as-is; B notes a bare ERC-3009 voucher has *no native condition*): + +- **Option 1 — On-chain `BountyEscrow.sol`.** Poster `lock()`s USDC/OBOL into a contract; release on a verifier EIP-712 signature; refund on timeout; native milestones + bond/slash. **Real custody, trust-minimized — but needs an audited contract per chain.** Cannot directly hold card funds. +- **Option 2 — Pre-signed conditional ERC-3009 voucher held by a coordinator agent.** Reuses `PurchaseRequest.PreSignedAuths[]` (types.go:565) verbatim. **Ships this week on existing code, zero new contracts.** *Honest limit B surfaced and A glossed:* an ERC-3009 voucher is a bearer instrument valid for its whole `validBefore` window — it has no on-chain condition. So the coordinator agent is a *de facto* custodian of the release *decision* (never of the funds-bearing key), and refund = the poster calling `cancelAuthorization(nonce)`. The poster's balance is not actually reserved. **This is escrow theater for trust-minimization — acceptable for low-value / reputation-gated pairs, fenced by a bond + value cap.** +- **Option 3 — x402-as-settlement (deliverable-as-a-sale).** The fulfiller "sells" the verified deliverable as a ServiceOffer; the poster "buys" it through the buyer sidecar. **Zero new payment code — but it provides no lock leg at all** (the fulfiller works on a promise). It is a *settlement rail*, not an escrow. + +**Decision:** +- **MVP = Option 2** for the lock, **gated to low-value / reputation-vetted fulfillers**, so we ship on shipped code. +- **End-state = Option 1** for custody + native milestones + slashing, with **Option 3 as the release rail** (the payout txn can be modeled as the poster buying the deliverable), and **ERC-8004 reputation progressively replacing escrow** as trust accrues. + +Critically, **the CRD surface is identical across phases** — only the *settlement adapter* swaps (`voucherAdapter` → `escrowContractAdapter` → `cardAuthAdapter`). This mirrors exactly how MPP #608 made payment *methods* pluggable (`Method: crypto|card`, types.go:216). ServiceBounty settlement becomes a fourth pluggable rail: `voucher | escrow | sale | cardAuth`. **One switch, four rails, one invariant: the controller never signs.** + +### 4.2 Who signs what (no-signer invariant, made explicit) + +| Action | Signer | Where | +|---|---|---| +| `lock` / voucher pre-sign | **Poster's agent wallet** | remote-signer `:9000`, poster ns (wallet.go) | +| `release` verifier signature | **Verifier agent / oracle** | its own wallet, or **Secure Enclave** for attestable trust (enclave_darwin.go) | +| voucher submission / `release()` call | **Coordinator agent** (MVP) or any submitter holding the verifier sig (contract verifies EIP-712) | agent ns; submitter is untrusted in the contract case | +| Fulfiller bond | **Fulfiller's agent wallet** | fulfiller ns remote-signer | + +### 4.3 Milestone / per-epoch release + +`PriceTable.PerEpoch` already exists (types.go:305, marked "Fine-tuning only") — bounties finally exercise it. **One milestone = one epoch = one release tranche.** + +- **MVP:** poster pre-signs **one voucher per epoch** into the escrow `PurchaseRequest` (this is the *exact* N-auth fan-out the buyer sidecar already does, `PreSignedAuths[]`). Verifier signs off on epoch *k*'s checkpoint → coordinator submits voucher *k*. Refund = cancel the unspent epochs' nonces. +- **End-state:** `release(id, fraction)` callable per milestone; contract tracks `releasedFraction`. + +For a 5-epoch fine-tune at `perEpoch: 40`, a fulfiller who completes 3/5 and then fails keeps 60% — incentive-aligned, and **poster loss is bounded to one unverified epoch**. This is the same bounded-loss discipline the buyer sidecar already enforces (`max loss = N × price`, `internal/x402/buyer/`). + +### 4.4 Fee, bond, payout + +- **Platform fee** — `feeBps` + `feeRecipient`. Contract: deducted atomically in `release()`. Voucher-MVP: a *second* pre-signed voucher poster→`feeRecipient` submitted alongside. Card: Stripe `application_fee_amount`. +- **Fulfiller bond** (anti-griefing) — `spec.bond`, staked before `Claimed`, returned on accepted proof or honest timeout, **slashed** to `feeRecipient`/poster on bad-faith submission. Bond ≥ verifier's marginal verification cost so spamming is never profitable. Sized so `bond × P(detected) > reward` → lying is always −EV. +- **Payout token** — reuses `ServiceOfferPayment.Method`/`Asset`: `eip3009` (USDC), `permit2` (OBOL), or `Method: card`. **Card rewards cannot fund an on-chain escrow**, so for `reward.method: card` the lock is a **Stripe manual-capture `PaymentIntent`** (`capture_method: manual`): authorize up front (lock), capture on accepted proof (release), cancel on timeout (refund) — the off-chain mirror of `cancelAuthorization`, slotting into the same rail switch as `cardAuth`. + +### 4.5 Reconcile loop extension + +Add a sibling `reconcileServiceBounty` pass to `cmd/serviceoffer-controller/main.go`, cloned structurally from `reconcileOffer` (controller.go:386-559) — *not* an extension of it, following the `RegistrationRequest` precedent (one binary, now three controllers): + +1. **Finalizer + decode** — identical to controller.go:400-428; on delete, refund escrow (tombstone cleanup). +2. **`reconcileEscrow`** (replaces `reconcileUpstream`) — resolve `escrowRef`, apply the confused-deputy guard `escrowRef.Namespace == bounty.Namespace` copied from agent_resolver.go:46. Set `EscrowFunded` when `status.remaining > 0`. +3. **`reconcileClaims`** (replaces gate/route) — **no Middleware, no HTTPRoute**; admit claims up to `maxFulfillers`, lapse stale claims past `claimGracePeriod` using the `DrainEndsAt`/`DrainExpired` time math (types.go:498-519) + requeue-at-expiry. +4. **`reconcileVerification`** (new, the core) — run `acceptance.verifier` per submitted claim (§5). +5. **`reconcilePayout`** (replaces `reconcileRegistrationStatus`) — on `Verified`, trigger the existing buyer-sidecar settlement path against the escrow `PurchaseRequest`; for `card`, route through `internal/x402/card.go`. Set `Paid` + `PayoutTxHash`. +6. **Rollup** — `done := isConditionTrue("Verified") && isConditionTrue("Paid")`; on deadline-past-no-Verified → `Expired` → `reconcileRefund` → `Refunded`. + +--- + +## 5. Verification — from trusted coordinator to trust-minimized consensus + +**The honest premise up front: most ML deliverables are not cryptographically verifiable.** You cannot prove a tok/s number with a SNARK or that a model "is good" with a hash. What you *can* do is make cheating **expensive, attributable, and slashable** — reducing the trusted surface from "trust a person" to "trust a hash + a quorum + a bond." + +### 5.0 Shared primitives (all bounty types) + +- **Commit–reveal.** Fulfiller posts `H = hash(deliverable ‖ manifest ‖ salt)` *before* escrow logic runs, then reveals. Defeats "report a good number, ship a different model." Costs one 32-byte commitment. +- **Signed run-manifest.** Every deliverable ships `{datasetCommit, modelHash, harnessCommit, seed, params, hardwareClass, result, resultHash, fulfillerSig, enclaveSig?}`, signed by the fulfiller's ERC-8004 agent wallet. A bare "47 tok/s" is unfalsifiable; the manifest makes it **re-runnable**, which is the whole game. +- **Optimistic-by-default with a bonded challenge window**; pessimistic N-of-M consensus only for a low-reputation agent's first job or above a value threshold. **Reputation (ERC-8004) is the throttle** — it sets the verification tax: high-rep fulfillers clear with a short window and no upfront re-run; new agents are fully re-verified. This is a *policy dial*, not a protocol fork. + +### 5.1 Per-type verification + +| Type | What's verifiable | Mechanism | Honest limit | +|---|---|---|---| +| **Benchmark — eval *score*** | ✅ Strongly | Deterministic re-run on a *committed* held-out dataset (root committed at creation, rows revealed post-commit so they can't be trained on) + pinned harness + greedy/seed decode → **agreement within ε on the rounded score** (not bit-exact logits), N-of-M consensus on the rounded `resultHash`. **The flagship MVP case.** | Floating-point nondeterminism across GPUs → consensus on rounded scalar, never raw logits. | +| **Benchmark — tok/s** | ⚠️ Hardware-relative | Bind every claim to a `hardwareClass`; **reference-task calibration** (`normalized = claimed / referenceTokPerSec`) neutralizes silicon-lottery; verifier re-runs on *same-class* hardware and checks `verified ≥ claimed × (1−tol)`. Verifiable as **lower-bound + comparative ranking**, never a portable absolute. | Needs same-class verifiers in the pool. Frame as "verified ≥ claimed on declared class." | +| **Benchmark — tok/s/W** | ❌ Trust-only | — | No remote wattmeter attestation. Reputation + spot-audit only. Directly bites ANE "47–62 tok/s @ 2W" claims: throughput checkable, watts not. | +| **Fine-tune — checkpoint** | ✅ Strongly | Commit `modelHash` (no bait-and-switch), then **held-out eval re-run** on the committed checkpoint against `criteria` thresholds. Verification is **inference-only and orders of magnitude cheaper than the fine-tune itself** → optimistic verification is viable (an honest challenger can always afford to call a bluff). | **Never re-train** (non-deterministic, prohibitive). Data-contamination ("trained on test") is reputation/audit, not crypto — mitigate with a rotating never-revealed gold subset + reputation decay. | +| **Serving — SLA** | ✅ Automatic | **Reuse the deployed PodMonitor → Prometheus** (`internal/embed/infrastructure/base/templates/x402.yaml`, `llm.yaml`): liveness probes, quality canaries (known-good prompts vs committed reference), p50/p95/error-rate vs SLA. **Real paid x402 traffic doubles as liveness+quality proof** — a successful paid request *is* a datapoint. The most trust-minimizable type: machines decide payout, not people. Epoch payout via `price.perEpoch`/`perHour`; `drainAt`/`drainGracePeriod` give graceful teardown. | Graded/open-ended output quality isn't machine-verifiable → buyer dispute + reputation. Latency claims are probe-vantage-dependent → pin probe locations. | + +### 5.2 TEE / Secure Enclave — what it does and does NOT buy you + +The stack has **real** Secure Enclave signing (`enclave_darwin.go:330`, `Key.Sign(digest)` over P-256, hardware-bound, non-exportable, SIP-checked). Resolving a temptation across perspectives: **this is device/identity attestation, not computation attestation.** + +- ✅ It proves: "this result was signed by a key that physically lives in *a* Secure Enclave and never left the chip." Strong **sybil-resistance + device-binding** (one enclave key = one device). +- ❌ It does NOT prove the *computation* (the inference, the tok/s) ran in a TEE. The ANE/GPU compute runs *outside* any TEE; there is **no macOS TEE that attests an LLM forward pass.** + +**Correct use:** the enclave signature is a **reputation multiplier and challenge-window reducer** (more expensive to fake at scale because you need real distinct devices), *not* an oracle. **Don't claim TEE-verified inference.** + +### 5.3 Collusion & the oracle problem + +> **Superseded by §11** where this subsection leans on stake-weighting or slashing. The layered-defense framing survives; the *levers* changed (reputation ladder + random assignment + commit-reveal + escalation, not stake). + +Even with re-run + consensus: who watches the watchers? Layered defenses, none sufficient alone: + +1. **VRF-sampled, stake- and reputation-weighted verifier selection** *after* the result is committed — the fulfiller can't pre-select friendly verifiers. +2. **ERC-8004 reputation + stake** (`OnChainReg.AgentID`, ERC-721) — verifiers overturned by challenge/audit are slashed and lose reputation; sybils with no history carry near-zero weight. +3. **Enclave-bound verifier identity** — sybil farms now cost real hardware per identity. +4. **Disagreement → escalation, not blind majority** — escalate to a larger fresh pessimistic panel; collusion must win *every* escalation while the cost of being caught (full bond) dominates. +5. **Poster-as-oracle (MVP) → stake-weighted juror committee (v2)** for non-deterministic deliverables. + +**Honest floor (said out loud to users):** for deterministic deliverables (eval scores, checkpoint held-out re-run, SLA metrics) the oracle *is the re-run* — trust-minimizable. For non-deterministic deliverables (subjective quality, "is this a good fine-tune") **there is no cryptographic oracle** — you are buying a stake-weighted, slashable human/committee judgment. Power/watt, absolute cross-hardware tok/s, and "didn't train on the test set" rest permanently on reputation + stake + audit. + +--- + +## 6. ANE execution substrate + +**The honest framing, baked into the design (not an afterthought):** the verified ANE landscape says ANE = Core ML/ANEMLL **inference** for ≤8B models at ≤4K context, ~2–5× *slower* than the same Mac's GPU but ~10× more power-efficient. **No mainstream runtime (MLX, llama.cpp, vLLM, Ollama) dispatches LLM matmul to the ANE — they all run on the Metal GPU.** ANE *training* is reverse-engineering research only (maderix/ANE, Orion — PoC at 5–9% of peak, "does NOT replace GPU training"). Nobody clusters ANEs; real Mac fleets (exo) shard across the GPU/MLX. Ray multi-node on macOS is officially unsupported (Linux-only). + +So the fabric is, honestly named: **"distributed Mac *GPU* inference with optional per-node ANE for low-power small-model inference."** It advertises three capability classes and dispatches each to the substrate the research says actually works: + +| ServiceBounty class | Real substrate (today) | ANE role | Pluggable future | +|---|---|---|---| +| **serve** | MLX-GPU (`vllm-metal`/`llama.cpp`) for throughput; **ANEMLL→ANE** for ≤8B/≤4K low-power | ANE *is real here* for battery-bound nodes | — | +| **fine-tune** | **MLX GPU** (`mlx-lm.lora`/`mlx-tune`) | ANE only to *eval* checkpoints | `ane-train` (Orion) behind `OBOL_EXPERIMENTAL_ANE_TRAIN=1`, default OFF | +| **benchmark** | whatever engine the bounty names | ANE as a *measured target* (report ANE tok/s honestly: ~19 TFLOPS FP16, never "38 TOPS INT8" or "16×") | — | + +The fabric **never claims ANE training.** A fine-tune bounty demanding `task.requires: ["finetune.ane"]` is **rejected at claim time** unless the node opted into the experimental gate. + +### 6.1 Where Ray runs — host-side, NOT in-cluster (the load-bearing decision) + +**The ANE and Metal GPU are only reachable from host processes.** k3d nodes are Linux containers with no ANE, no Metal, no Core ML. Putting Ray workers in-cluster strands them on Linux with neither accelerator. obol-stack *already* solves this exact seam: the standalone inference gateway (`internal/inference/gateway.go`) and the Secure Enclave signer (`enclave_darwin.go`) **run on the Mac host**, and the cluster reaches them via `host.k3d.internal`. We reuse it. + +Because **Ray multi-node on macOS is unsupported**, the **Ray head runs on Linux** (a small k3d pod) while **Mac nodes run host-side Ray worker processes** that join it — the facts' recommended "Ray-head-on-Linux + Mac workers" pattern. Single-node degenerate case needs no cluster at all: `ray.init()` local mode. + +``` +┌──────────────── Mac host ─────────────────┐ ┌──── k3d (Linux) ─────┐ +│ obol runner (Agent runtime=worker) │ │ Ray HEAD pod │ +│ ├─ Ray WORKER ───────────────────────────┼────▶│ (GCS, scheduler) │ +│ │ ├─ Ray Serve → MLX-GPU / ANEMLL-ANE │ │ serviceoffer- + │ +│ │ ├─ Ray Train → MLX trainer │ │ servicebounty-controller, │ +│ │ └─ benchmark task → harness │ │ x402, LiteLLM, │ +│ ├─ Secure Enclave signer │◀────┼─ Traefik (reach host │ +│ └─ obol sell inference (host gateway) │ │ via host.k3d.internal)│ +└────────────────────────────────────────────┘ └──────────────────────┘ +``` + +Control plane (bounty board, x402 verify/settle, ERC-8004) stays **in-cluster**. Ray + accelerators stay **on the host**. The runner is the bridge. + +### 6.2 Node identity — reuse the `Agent` CR (one schema change) + +A Mac joins by creating **one `Agent` CR** (its identity + payout wallet). The Agent CR already gives a namespaced identity, an optional remote-signer wallet (`AgentWallet.Create` → `GenerateWallet()` in wallet.go), and a status block with `WalletAddress`/`Endpoint`/`Phase` (types.go:715-727). The **only schema change to an existing CRD** is extending the runtime enum: + +```go +// AgentSpec.Runtime — types.go:686-690 +// +kubebuilder:validation:Enum=hermes;worker // ← add "worker" +Runtime string `json:"runtime,omitempty"` +``` + +`EffectiveRuntime()` (types.go:731) already defaults to hermes, so this is additive. A `runtime: worker` Agent is **not a Hermes pod** — it's a host-side runner process whose `Status.Endpoint` points at its Ray Serve / control port and whose wallet is the **payout address**. + +**Capability is measured, not declared** (every Mac *claims* an ANE). A one-time onboarding probe writes a `WorkerProfile` into the existing ERC-8004 `Metadata`/`Provenance` maps (types.go:249-274; published via the `RegistrationRequest` path, controller.go:802) — measured per-engine tok/s, chip, RAM, cached model inventory, context ceiling, and a `capabilities[]` list (`serve.ane`, `serve.gpu`, `finetune.mlx`, `benchmark`, `render`). **`finetune.ane` is deliberately absent** unless the experimental gate is on. A node that lies (claims `serve.ane`, has no ANE) fails the deterministic benchmark gate (§5) and loses reputation. No new CRD field — capability rides the free-form metadata maps. + +### 6.3 The `BountyRunner` plugin interface + +New task types must drop in **without touching the controller or the `ServiceBounty` CRD**. The controller only ever sees an opaque `spec.task.runner` + `spec.task` blob, a verifiable `Proof`, and a settlement trigger. All task semantics live host-side behind a `BountyRunner` interface keyed by `spec.task.runner`: + +```go +// internal/worker/runner.go (host-side; controller never imports this) +type BountyRunner interface { + ID() string // matches ServiceBounty.spec.task.runner + Capabilities() []Capability // must intersect spec.task.requires + Validate(spec ServiceBountySpec, node WorkerProfile) error // ANE limits enforced HERE: + // serve.ane rejects params>8B or ctx>4K; finetune rejects ane-train unless gated + Resolve(ctx, spec) (ResolvedInputs, error) // pull content-addressed model/dataset + Run(ctx, in, progress chan<- ProgressEvent) (outputs map[string]string, error) // streams 1→n + Prove(in, outputs, sign Signer) (Proof, error) // controller verifies generically +} + +register(MLXServeRunner{}) // serve.gpu +register(ANEMLLServeRunner{}) // serve.ane — ≤8B / ≤4K only +register(MLXLoRARunner{}) // finetune.mlx — GPU +register(BenchmarkRunner{}) // benchmark — doubles as the anti-lying gate (§5) +register(ComfyRenderRunner{}) // render — wraps ComfyUI, exposed via `obol sell http` +if experimentalANETrain { register(OrionANETrainRunner{}) } // finetune.ane — gated, OFF by default +``` + +`Run` streams per-step `{step, loss, tok_s, etaSec}` over **the SSE flush seam that already exists** (`x402-verifier.HandleProxy` flushes per-write; `statusRecorder.Flush` must forward to the underlying `http.Flusher`, `internal/x402/verifier.go`, regression `TestVerifier_HandleProxy_StreamsSSEChunks`). This is not cosmetic: a 500-step job streams keepalive progress so it survives the Cloudflare quick-tunnel ~100s idle ceiling — the exact reason CLAUDE.md prefers `stream: true`. + +Adding RL, eval, or embeddings = write one `BountyRunner`, register it, advertise its `Capabilities()`. CRD, controller, x402 settlement, ERC-8004 — untouched. This is the same polymorphism the controller already uses for `ServiceOffer.Type` (the `agent` resolver synthesizes upstream without the rest of the pipeline branching, agent_resolver.go:33). **The ANE-training gate is the whole modularity payoff:** if Orion ever graduates, flip the env flag, `finetune.ane` appears, `ane-train` bounties start matching — with no controller or CRD change. Until then it's vapor and the fabric correctly refuses to schedule it. + +### 6.4 One Mac, end-to-end (no Ray cluster needed) + +1. `obol stack up` (k3d + controllers + x402 + LiteLLM, as today). +2. `obol agent new worker-x --runtime worker --create-wallet` → one Agent CR + wallet. +3. Runner probes the box, publishes `WorkerProfile` via a `RegistrationRequest`. +4. `ray.init()` local mode — no head pod. +5. A `ServiceBounty` is claimed, executed against **MLX-GPU (finetune/serve) or ANEMLL-ANE (small-model serve)**, proven with the Enclave key, and either pinned (IPFS) or handed off as a `ServiceOffer` (§7). **A real ANE-served bounty is demoable on one MacBook today.** + +### 6.5 What changes at N nodes + +| Concern | 1 node | N nodes | +|---|---|---| +| Ray topology | `ray.init()` local | **Head on Linux**; Mac runners are host-side workers (forced: macOS multi-node unsupported) | +| Scheduling | trivial | Ray places by **custom resources = `capabilities[]`**: small-model low-power → ANE nodes, GPU jobs → Max chips | +| Claim contention | none | Controller lease (`status.claimedBy` + finalizer) — single-writer, same discipline as serviceoffer-controller | +| Fine-tune scale-out | single worker | Ray Train `num_workers>1` **on MLX-GPU** — *distributed GPU* (the real path), **not** distributed ANE | +| Serve scale-out | 1 replica | Ray Serve `num_replicas=N`, fronted by **one ServiceOffer** → Traefik load-balances `/services//*` over multiple Endpoints (ClusterIP, per the ExternalName-avoidance rule) | + +**Invariant across the growth curve: Ray scales the *GPU* fabric; the ANE is always a per-node, small-model, low-power inference/eval accelerator — never a cluster-wide training pool.** That is the only design the landscape supports. + +--- + +## 7. Three worked examples (post → claim → run → verify → pay) + +**Proposed CLI surface.** Demand side: `obol bounty post|list|claim|submit|status|cancel`. Fulfiller side: `obol fulfill ` (the runner loop), `obol worker onboard` (probe + register, an alias over `agent new --runtime worker`). Reuse `obol buy inference` (#607) for consuming served bounties and `obol sell mcp` (#609) for verifier-as-a-tool. + +### 7.1 Benchmark (the $500 case) + +```bash +# POST — poster escrows $500 as pre-signed vouchers into a PurchaseRequest, creates the ServiceBounty +obol bounty post bench-deepseek-v4-flash --type benchmark --runner bench \ + --model deepseek-v4-flash --hardware-class M4-Max-40c-128g \ + --reward 500 --asset USDT --chain base --bond 750 \ + --verifier consensus --harness lm-eval-harness@v0.4.3 \ + --criteria "mmlu>=0,gsm8k>=0,humaneval>=0" +# → escrow PurchaseRequest (PreSignedAuths[]) + ServiceBounty CR, phase=Open + +# CLAIM — a fulfiller's runner sees the board, stakes the bond, leases the bounty +obol bounty list --requires benchmark +obol fulfill bench-deepseek-v4-flash # sets status.claimedBy, stakes bond, phase=Claimed + +# RUN — runner commits H, runs the pinned harness, signs the run-manifest +# (BenchmarkRunner; engine reported honestly: GPU or ANE) +# SUBMIT +obol bounty submit bench-deepseek-v4-flash \ + --artifact results.json --artifact run.manifest # phase=Submitted (committed first) + +# VERIFY — N-of-M VRF-sampled same-class verifiers re-run on the committed dataset, +# agree within ε on the rounded eval-score hash → controller sets Verified +# PAY — reconcilePayout releases one escrow voucher to the fulfiller's wallet → phase=Paid +obol bounty status bench-deepseek-v4-flash # Verified=True, Paid=True, PayoutTxHash=0x... +``` + +### 7.2 Fine-tune (staged, pay-per-epoch) + +```bash +obol bounty post ft-qwen-coder --type fine-tune --runner mlx-lora \ + --model qwen3.5:9b --dataset ipfs://bafy.../sql.jsonl --epochs 3 \ + --reward-per-epoch 40 --reward 120 --asset USDC --chain base-sepolia --bond 200 \ + --verifier harness-rerun --criteria "sql_exec_acc>=0.78,eval_loss<=0.85" \ + --no-first-valid-wins # poster reviews before final release + +obol fulfill ft-qwen-coder +# runner trains on MLX GPU (NOT ANE), streams {step,loss,tok_s} via SSE through HandleProxy +# after each epoch's checkpoint: verifier does held-out re-eval (inference-only, cheap) +# → controller releases that epoch's $40 voucher; 3 epochs = $120 + $120 final = $240 +obol bounty status ft-qwen-coder # shows EscrowRemaining shrinking per accepted epoch +``` + +### 7.3 Serve (ComfyUI, card-paid, becomes a sellable endpoint) + +```bash +obol bounty post host-comfyui-sdxl --type serve --runner comfyui \ + --model sdxl-comfyui --reward-per-hour 0.50 --pay-with card \ + --verifier tee-attestation --criteria "uptime_pct>=99.5,p95_latency_ms<=4000" \ + --max-fulfillers 3 # 3 redundant hosts + +obol fulfill host-comfyui-sdxl +# runner stands up Ray Serve → ComfyUI, then runs the HANDOFF that closes the loop: +obol sell inference bounty-svc-host-comfyui --model sdxl-comfyui \ + --pay-to 0xWORKER... --per-mtok 0.05 --chain base +# → ServiceOffer → controller: ModelReady→...→Ready (controller.go:528-532) +# → Traefik routes /services/bounty-svc-host-comfyui/* via x402 to the host listener + +# VERIFY — continuous, automatic: PodMonitor→Prometheus checks uptime/p95; canary probes; +# real paid traffic doubles as liveness. SLA met → epoch payout via Stripe capture (#608). +# CONSUME — the bounty produced a DURABLE revenue endpoint, not a one-shot: +obol buy inference http://obol.stack:8080/services/bounty-svc-host-comfyui # #607 UX +``` + +The serve example is the marketplace's keystone: **a fulfilled serve bounty becomes a `ServiceOffer`**, so the bounty doesn't just pay once — it spins up standing supply that anyone can then buy. Demand creates supply. That is the two-sided market closing on itself. + +--- + +## 8. Modularity & growth + +- **New task types** drop in as a single `BountyRunner` (§6.3) advertising new `capabilities[]`. The CRD, controller, x402 rail, and ERC-8004 path never change — they operate on the opaque `spec.task.runner` + a generic `Proof`. RL, eval, embeddings, render: one file each. +- **New payment methods** drop in as a settlement adapter (`voucher|escrow|sale|cardAuth`), exactly as MPP #608 made `Method: crypto|card` pluggable. The controller never signs in any of them. +- **Composition with `obol sell mcp` (#609):** a verifier exposes "verify-this-bounty" as a **paid MCP tool over x402** (`internal/x402mcp/server.go`) — verification becomes a permissionless, per-job-compensated market, and submission/verification ride the same in-band `_meta` x402 rail. +- **Composition with card payments (#608):** rewards payable in USDC/OBOL/card via the same pluggable `cardSettleFunc` (`internal/x402/card.go`); card escrow = Stripe manual-capture. +- **Composition with buy-inference (#607):** fulfillers *discover* demand via the same `/skill.md` + `/api/services.json` feeds and `internal/buy/discover.go`; posters *consume* served-bounty endpoints with the new positional-URL `obol buy inference` UX (`internal/buy/{balance,discover,purchases}.go`). + +The marketplace is therefore **closed under composition**: a ServiceBounty can be fulfilled by an Agent (`obol sell agent`), served as a ServiceOffer, consumed via buy-inference, verified via a paid MCP tool, and paid in fiat — all on machinery that already ships. + +--- + +## 9. Phased roadmap + +**Smallest shippable slice — v0 (target: the deterministic-eval happy path on one Mac):** + +1. **`ServiceBounty` CRD + GVR registration** — `internal/monetizeapi/types.go` (add `ServiceBountyKind`/`ServiceBountyResource`/`ServiceBountyGVR` near :48-67; `ServiceBountySpec`/`ServiceBountyStatus`/`ServiceBountyClaim`; clone `DrainEndsAt`/`DrainExpired` as `EffectiveDeadline`/`ClaimExpired` from :498-519). Ship the CRD manifest in `internal/embed/infrastructure/base/templates/` beside `serviceoffer-crd.yaml`; extend `internal/embed/embed_crd_test.go`. +2. **`runtime: worker` enum** — one-line additive change at `types.go:686-690`. +3. **`reconcileServiceBounty` sibling pass** — `internal/serviceoffercontroller/` (new `bounty_controller.go` + `bounty_render.go`), wired into `cmd/serviceoffer-controller/main.go` as a third queue, cloned from `reconcileOffer` (controller.go:386-559). Includes the confused-deputy escrow guard (copy agent_resolver.go:46). +4. **Escrow via Option 2 (voucher)** — reuse `PurchaseRequest.PreSignedAuths[]` (types.go:565); release through the existing buyer-sidecar settlement path. **Trusted coordinator + poster-as-judge** for acceptance; single re-run for deterministic types. No consensus yet. +5. **CLI** — `obol bounty post|list|claim|submit|status` in `cmd/obol/` (new `bounty.go`); `obol worker onboard` as an alias over `agent new --runtime worker`. +6. **Single-Mac runner** — `internal/worker/` (new): `runner.go` (the `BountyRunner` interface + loop) with `BenchmarkRunner`, `MLXServeRunner`, `ANEMLLServeRunner`, `MLXLoRARunner`. `ray.init()` local mode. Reuse `enclave_darwin.go` for proof signing, `inference/gateway.go` for serve handoff. +7. **RBAC** — add `servicebounties` + `servicebounties/status` to the agent role in `internal/embed/infrastructure/base/templates/obol-agent-monetize-rbac.yaml` — as a **namespaced Role/RoleBinding** (NOT the existing cluster-wide `openclaw-monetize-write` ClusterRole; see corrections #2), beside `serviceoffers`/`purchaserequests`. + +**Flagship v0 bounty types:** deterministic **eval-score benchmark** (near-trust-minimized for free) and **serving SLA** (automatic via existing PodMonitor/Prometheus). v0 is *honest about trust*: you trust the coordinator and the poster. + +**v1 / v2 verification roadmap — superseded by §11.** The paragraphs below predate the no-staking steer; the canonical eval roadmap is §11.7. Kept for the non-verification items only (hardware-class binding, `BountyEscrow.sol`, MCP composition). + +**v1 — verifier consensus + optimistic challenge market** *(superseded where stake-weighted)*: ~~VRF-sampled, stake-weighted N-of-M consensus~~ → median-of-k OBOL-paid evaluator quorum (§11); hardware-class binding + reference normalization for tok/s; probabilistic full-audit for fine-tunes. Coordinator becomes a dumb router. + +**v2 — trust-minimized** *(superseded where stake-weighted)*: on-chain `BountyEscrow.sol` removes fund custody; enclave-bound evaluator identities for real sybil cost; ~~a stake-weighted juror committee~~ → disagreement-triggered escalation panels (§11.7); ERC-8004 reputation sets the verification tax (high-rep → short optimistic window; low-rep → mandatory pessimistic re-run); verifier-as-a-paid-MCP-tool (#609) makes the evaluator market permissionless. Flip `OBOL_EXPERIMENTAL_ANE_TRAIN=1` *only if* Orion ever leaves PoC. + +**Build it Monday:** items 1–6 are the v0 cut. The only genuinely new code is the `ServiceBounty` CRD, the `reconcileServiceBounty` pass, the `cmd/obol/bounty.go` CLI, and the `internal/worker/` runner. Everything else — escrow vouchers, x402 settlement, ERC-8004 identity, the Enclave signer, the serve handoff, PodMonitor SLA — already ships. + +--- + +## 10. Honest risks & open questions + +1. **Voucher-MVP is escrow theater.** Option 2 gives the coordinator control over a *release decision* without funds custody, and the poster's balance isn't actually reserved (the voucher bounces if the poster spends elsewhere). This is deliberate, fenced by a value cap + bond + reputation gating, and retired the moment `BountyEscrow.sol` ships. **Don't market it as custody.** +2. **No cryptographic oracle for non-deterministic deliverables.** Open-ended quality, tok/s/W, absolute cross-hardware throughput, and "didn't train on the test set" rest *permanently* on reputation + stake + audit. The product must label each bounty's class so posters know what they're buying. +3. **TEE attests the signer, not the computation.** There is no macOS TEE for an LLM forward pass. Overselling "TEE-verified inference" would be a lie; the enclave is a sybil-resistance multiplier only. +4. **Same-class verifier liquidity.** Verifying an M4-Max tok/s claim needs M4-Max verifiers in the pool. Bootstrapping that pool per hardware class is a real operational cost; until it exists, tok/s bounties fall back to reputation. +5. **ANE training is vapor today.** The `finetune.ane` path is gated off for a reason (Orion: GPT-2-124M, 5–9% peak). If we ever ship it on, we must re-validate the landscape — building product on it now would be dishonest. +6. **Ray-head-on-Linux is an extra moving part.** macOS multi-node being unsupported forces a Linux head; this complicates the N-node story and the demo. Single-node `ray.init()` is the safe default; clustering is a v1+ concern. +7. **Cross-namespace escrow is a confused-deputy footgun.** The `escrowRef.Namespace == bounty.Namespace` guard (mirroring agent_resolver.go:46) is **load-bearing** — without it a poster in ns A could drain a `PurchaseRequest`'s pre-signed auths in ns B. This must ship with the CRD, not after. +8. **Open question: does the coordinator hold the verifier release key in MVP?** If yes, it's a single point of compromise for all open bounties (mitigate: per-bounty keys / threshold / move to on-chain EIP-712 release ASAP). If no, who submits the voucher after verification? Resolve before v0 ships. +9. **Open question: dispute resolution latency.** The challenge window trades payout speed against safety. What window length per type, and who funds the watcher incentive at low volume before challenger rewards self-sustain? *(Partially superseded: §11.7's escalation panel is the new dispute path; window economics still open.)* + +--- + +## 11. Evaluator market — verification by default (canonical, 2026-06-10) + +> **This section is the canonical verification design.** It supersedes §5.3's stake-weighted machinery and the §9 v1/v2 stake/slashing roadmap. Design steer after the v1 scaffold shipped: **no validator staking, no slashing — we are not rebuilding EigenLayer.** Verification is a separate OBOL-paid evaluator market anchored on ERC-8004 reputation. Full research citations: `plans/evaluator-market-research-notes.md`. + +### 11.1 Trust model and money legs + +The poster funds **two legs** at post time; the controller tallies but never signs: + +| Leg | Token | Signed by | When | On pass | On fail | +|---|---|---|---|---|---| +| Reward | USDC | Poster at post (`upto`, recipient bound at claim via `witness.to`) | Escrowed at post | Captured → fulfiller | Voided → refund poster | +| Eval budget | OBOL | Poster's **agent** at selection time (Permit2, `witness.to` = each evaluator) | Reserved at post, signed when evaluator set is known | Batch-settled to k evaluators (one tx) | Batch-settled to k evaluators | +| Self-bond | OBOL | Fulfiller at claim (`ServiceBountySelfBond`) | Held with claim | Returned | Forfeited → offsets poster's eval spend (anti-griefing) | + +The eval leg **cannot be pre-signed at post** — `witness.to` needs evaluator addresses that don't exist until selection. The poster's agent signs at selection (buy.py-process-loop style); bounded to exactly k × the per-eval price approved at post. Evaluators submit ERC-8004 `validationResponse` (0–100) with **their own agent wallets**; the controller reads and tallies. Per-eval price, k, and tolerance bands come from the task package (`task.yaml`), not per-bounty negotiation. + +### 11.2 Defaults and the dangerous flag + +Verification is **on by default**. `obol bounty post` shows a cost preview (reward + k × evalPrice) and confirms in a TTY. Opt-out is explicit and never silent: + +- `--dangerously-skip-verification` (house precedent: `dangerouslyDisableDeviceAuth`) → `spec.eval.mode: dangerouslySkipped`, printer column `VERIFIED: no`, `Verified` condition keeps `reason=PosterOverride` — the shipped v1 scaffold's poster-as-judge path **is** the skipped path, correctly labeled, nothing retrofits. +- Skipped bounties write no ERC-8004 validation entries and their reputation feedback is suppressed/discounted — an unverified bounty cannot be farmed for reputation. +- Non-TTY: no prompt, but skipping still requires the flag. +- `--evaluators N` raises k above the package default; `--no-newcomer-seat` buys an all-veteran quorum at full price (§11.4). + +### 11.3 Lifecycle with the EVALUATING phase + +``` + post ─► Open ─claim─► Claimed ─submit─► Submitted + │ + ┌─────────────┴──────────────┐ + │ EVALUATING │ + enrolled evaluator pool ──►│ 1. SELECT k evaluators, │ + (ERC-8004 id + enclave │ reputation-weighted │ + attestation, per task │ 2. COMMIT hash(score ‖ │◄─ each re-runs the + type) │ salt ‖ evaluatorAddr)│ private dataset + │ 3. REVEAL scores + salt │ fraction locally + │ 4. QUORUM median within │ + │ tolerance band? │ + └──────┬──────────────┬──────┘ + pass │ │ fail + ▼ ▼ + Verified=True Rejected + reason= (reward voids → refund, + EvaluatorQuorum self-bond forfeits) + │ + ▼ + Paid: reward → fulfiller (capture) + eval budget → k evaluators (batch-settlement) +``` + +Evaluators claim slots and post verdicts through the same annotation write-channel as fulfillers (`obol.org/eval-claim|eval-commit|eval-verdict`), validated and promoted by the controller. The eval an evaluator runs is **the same embedded task package** — they re-run and compare, they don't grade freestyle. + +### 11.4 The ladder: Shadow → Probation → Full (cold-start without ossification) + +Quorum = **median of k** is what makes this safe: a median is robust to one outlier by construction, so one newcomer seat cannot flip a verdict even if malicious. + +``` + ┌─────────────────────────────────────────────┐ + │ SEAT COMPOSITION OF A k=3 QUORUM │ + TIER 2 · FULL ──────►│ Seat 1 high-rep full price counts │ + rep-weighted lottery │ Seat 2 high-rep full price counts │ + TIER 1 · PROBATION ─►│ Seat 3 newcomer ~50% price counts │ + reserved seat, │ (median absorbs one outlier; │ + value-capped bounties│ discount passed to poster) │ + TIER 0 · SHADOW ────►│ +1..2 shadow free scored │ + random assignment, │ commit-reveal alongside, verdict │ + can't pick bounties │ graded against quorum median │ + └─────────────────────────────────────────────┘ + PROMOTION Shadow ──(N agreements within tolerance)──► Probation + Probation ──(M paid evals, no divergence)──► Full + DEMOTION divergence → rep hit → weight drops; inactivity → decay +``` + +- **Tier 0 Shadow (free)**: enroll = ERC-8004 identity + Secure Enclave device attestation, per task type. Randomly *assigned* to live bounties (can't park sybils where you want them); commits and reveals in the same window; verdict counts for nothing, pays nothing; graded against the quorum median → ERC-8004 feedback anchored to the settled bounty. Farming cost = real GPU time per attested device. +- **Tier 1 Probation**: one reserved seat of k, counts fully (median protects the verdict), ~50% pay with the **discount passed to the poster** — posters gain from hosting newcomer seats. Only on bounties below a value cap. Requires k≥3 whenever seated. +- **Tier 2 Full**: reputation-weighted lottery, full price, all values. v1 selection is controller-side weighted sampling (honest about local-first centralization); the selection function is the swap seam for VRF when cross-party. +- Promotion thresholds live in the task package: `eval.ladder: {shadowAgreements, probationEvals, probationValueCap}`. +- Anti-collusion: random shadow assignment, commit-reveal, **pair-diversity** (down-weight repeat evaluator↔fulfiller pairs), device-binding, rep decay. Reputation is **per task type**. + +### 11.5 What adjacent protocols taught us (deep-research 2026-06-10, all claims 3-vote verified) + +**The no-stake bet is vindicated.** Bittensor's stake-weighted Yuma Consensus is governed by capital, not quality: top 1% of wallets held a median ~90% of stake across 64 subnets; >half of subnets 51%-attackable by <1% of wallets; rewards correlate with stake at r≈0.80–0.95 vs r≈0.50 with consensus quality. The cold-start corollary transfers: low-participant markets are trivially capturable — benchmark the ladder against small-coalition takeover in the early phase. + +**Three confirmed weaknesses:** +1. **Median-proximity free-riding** (Bittensor weight-copying, production-exploited: copiers out-earned honest validators). Commit-reveal only stops *same-round* copying — Bittensor's own docs concede that for static ground truth "nothing can prevent weight copying." For repeated bounty types, copying last round's revealed median works. Fix = **make the answer move** (rotate the private fraction), not longer concealment. +2. **p+epsilon bribery** (executed on Kleros mainnet, 2018 Doges on Trial: the bribe won rounds 1–2 of disputeID 75 and was reversed only by an appeal to a fresh 14-juror panel). Attacker pledges P+ε conditional on the dishonest outcome *losing* → everyone complies → bribe never paid → zero realized cost. The two defenses that work — slashable deposits and escalating appeals (O(N²) attacker lockup) — are both absent from our v1. Our bribery floor = per-task reward + discounted reputation-stream value; commit-reveal is *load-bearing* in a no-appeal design. +3. **Attestation-only sybil resistance has no production precedent.** Kleros is explicit that stake IS the sybil defense for random sortition. Device attestation + rep decay carry that burden alone; the free Shadow tier is the attack surface — cost-per-attested-device must exceed the value of walking a sybil to a Full seat. + +**Plus**: base-rate guessing beats coherence reputation (Kleros: ~70% Reject skew → zero-effort base-rate voting looks ~88% coherent). If most bounties pass, rubber-stamp "pass" votes climb the ladder. + +### 11.6 Mechanisms stolen verbatim + +| Steal | From | Fixes | +|---|---|---| +| `hash(score ‖ salt ‖ evaluatorAddress)` commitments | Kleros §4.3 | Commitment copy/replay between evaluators | +| Non-reveal penalty ≥ outlier penalty | Kleros incentive system | Silent abstention as the cheap exit | +| Automated reveals (Drand time-lock) or non-reveal = worst case | Bittensor CR4 | Selective revelation | +| EV-balance tuning (no-effort evaluation must be EV-negative) | Kleros parameterization | Lazy rubber-stamping; our lever is rep decay, not voteStake | +| Difficulty-weighted rep (reward correct-minority, not easy unanimity) | derived from Kleros base-rate data | Base-rate climbing | +| Known-fail canaries in the private fraction | derived | Makes rubber-stampers detectably wrong | +| Disagreement-triggered escalation to a larger fresh panel | Kleros appeals | The only defense that beat p+epsilon in production | + +### 11.7 Amendments (folded into the build plan) + +**v1 (ship in the ladder slice):** +1. Commitment format = `hash(score ‖ salt ‖ evaluatorAddress)`. +2. Fixed reveal window; non-reveal = worst-case outlier (rep penalty ≥ divergence penalty). `task.yaml` ladder block gains `revealWindow` + `nonRevealPenalty`. +3. Seed `datasetCommit.privateFraction` with known-fail canaries; **rotate the private fraction per round** for repeatable bounty types. +4. Reputation gains weighted by disagreement/difficulty — unanimous easy agreement earns ~0; correct minority positions earn most. + +**v2 (design before cross-party):** +5. **Disagreement-triggered escalation**: revealed scores straddling the tolerance band → re-run with a larger fresh panel (2k+1); poster pre-approves an escalation budget cap at post. Weaker than Kleros's (no loser-deposit redistribution funds it) — cost falls on the eval budget. +6. **Quantify the bribery floor in OBOL**: the discounted value of a Full seat's future income stream is our analog of Kleros's O(N²) lockup. If corrupting ⌈k/2⌉+1 medians costs less than plausible bounty values, raise k or tighten value caps. +7. Drand-style time-lock reveals when cross-party. + +**Open questions carried forward:** OBOL value of a Full-tier reputation stream (unquantified); empirical adequacy of device attestation as a sybil bound (no production precedent anywhere); which task types have static-enough ground truth that commit-reveal is structurally insufficient → rotation cadence; how Truebit/Gensyn/Numerai/Chainlink handle non-deterministic verification (didn't survive this research round — re-research before a verifiable-compute task type ships). + +--- + +*Relevant code anchors reused throughout: `internal/monetizeapi/types.go` (Type enum :105, Model :166-174, Payment/PriceTable :211-247/:299-305, card :216, registration/supportedTrust :308-333, drain time-math :498-519, PurchaseRequest/PreSignedAuths :536/:565/:638, Agent runtime/status :686-690/:715-727/:731); `internal/serviceoffercontroller/controller.go` (reconcile loop :386-559, Ready rollup :528-532, gate/route :660/:695, registration sibling :802); `internal/serviceoffercontroller/agent_resolver.go:33,:46` (polymorphic upstream + confused-deputy guard); `internal/x402/verifier.go` (SSE flush seam); `internal/x402/buyer/` (bounded settle-after-success); `internal/inference/gateway.go` (host gateway/NoPaymentGate); `internal/enclave/enclave_darwin.go:330` (real Secure Enclave Sign); `internal/openclaw/wallet.go` (payout wallet); `internal/erc8004/types.go:15-47` (OnChainReg.AgentID, SupportedTrust[]); `internal/embed/infrastructure/base/templates/{x402.yaml,llm.yaml}` (PodMonitor→Prometheus), `obol-agent-monetize-rbac.yaml` (agent RBAC); `internal/x402/card.go` (#608), `internal/x402mcp/server.go` (#609), `internal/buy/` (#607).* + + +--- + +## Appendix A — Verified ANE landscape (live research, 2026-06-09) + +**Feasibility verdict.** INFERENCE on ANE: REAL but niche. Running small LLMs (<=8B) on the ANE works today via Apple Core ML (ANEMLL is the leading open pipeline, Beta 0.3.5). It is power-efficient but 2-5x SLOWER than the same Mac's GPU. No mainstream runtime (MLX, llama.cpp, vLLM, Ollama) dispatches LLM matmul to the ANE; they all run on the Metal GPU and leave the ANE idle. TRAINING on ANE: REAL only as research PoC. Two reverse-engineered projects (maderix/ANE and its successor mechramc/Orion) genuinely run forward+backward passes on the ANE via private _ANEClient/_ANECompiler APIs, but the authors themselves say it does NOT replace GPU training and runs at ~5-9% of peak. DISTRIBUTED ANE / Mac fleets: No one clusters ANEs. Real Mac clusters (exo) shard models across the GPU/MLX, not the ANE. Ray multi-node on macOS is officially UNSUPPORTED (Linux-only; macOS multi-node is 'untested', needs an at-your-own-risk env flag). A 'distributed ANE access platform' for training is NOT buildable today; a distributed GPU-based Mac inference cluster IS. + + +**Detailed findings.** Skeptical verdict after cross-verifying every pasted claim against primary sources (GitHub repos/issues, an arXiv preprint, Apple research, and independent benchmarks).\n\nINFERENCE on the ANE is real but a niche, low-power play: ANEMLL (Beta 0.3.5) genuinely runs <=8B LLMs (Llama/Qwen/Gemma/DeepSeek-distill) through Core ML on the ANE at ~512-4K context, but it is 2-5x SLOWER than the same Mac's GPU (e.g. ~47-62 tok/s @2W on Llama-3.2-1B vs ~204 tok/s @20W on GPU). Crucially, NO mainstream runtime uses the ANE for LLMs: MLX (issue #18 open), llama.cpp (issue #10453 is an OPEN proposal, nothing merged; discussion #336 is exploratory), vLLM (vllm-metal/vllm-mlx are real but GPU-via-MLX), Ollama and LM Studio all run on the Metal GPU and leave the ANE idle. Unsloth has no ANE support ('in the works'); 'Unsloth-MLX' was renamed mlx-tune and trains on the GPU.\n\nThe Anemll 'Flash-MoE' / anemll-flash-llama.cpp fork is real and IS a llama.cpp fork, but it streams MoE experts from SSD to the Metal GPU — not the ANE.\n\nTRAINING on the ANE is real ONLY as reverse-engineering research. maderix/ANE genuinely does forward+backward, Adam, dynamic weight patching, and zero-copy GPU<->ANE via private _ANEClient/_ANECompiler + MIL — but the author labels it a PoC at ~5-9% of peak that 'does NOT replace GPU training.' Its successor mechramc/Orion (backed by arXiv 2603.06728, Mar 2026) extends this with LoRA hot-swap and a compiler, but still on tiny GPT-2-124M/Stories-110M models. These prove the inference-only restriction is a software policy, not silicon — but ANE training is nowhere near production.\n\nDISTRIBUTED: no one clusters ANEs. Real Mac fleets (exo, ~38k stars, RDMA-over-Thunderbolt 5) shard across the GPU/MLX, not the ANE. Ray multi-node is officially Linux-only; macOS multi-node is 'untested' behind RAY_ENABLE_WINDOWS_OR_OSX_CLUSTER=1.\n\nFor a 'distributed ANE access platform': building it on ANE *training* is not feasible today. The realistic build is a distributed Mac *GPU* inference platform (exo or Ray-head-on-Linux + Mac workers, MLX/vllm-metal/llama.cpp per node), with optional per-node ANEMLL for low-power small-model inference. Numbers to distrust: the '16x speedup' has no source (fabricated), and Apple's '38 TOPS INT8' is a 2x-convention over a measured ~19 TFLOPS FP16 peak with no real INT8 compute speedup for LLM matmul. + + +--- + +## Appendix B — Adversarial red-team + +### Biggest risk + +The whole design rests on a load-bearing falsehood: that bounty payout "reuses the existing buyer-sidecar settlement path against the escrow PurchaseRequest" (sec 4.5 step 5, sec 9 item 4, sec 7.1 PAY). It does not, and cannot, without net-new payment code, which collapses the doc's central "build it Monday, only 4 new files" thesis. Verified against internal/x402/buyer/proxy.go and signer.go: the buyer sidecar is an http.RoundTripper (proxy.go:580-649) that consumes exactly one pre-signed ERC-3009 voucher when, and only when, a LIVE x402-gated HTTP upstream returns <400 to a per-request micropayment (proxy.go:241, 605; ConfirmSpend at signer.go:295-297 "persists a nonce as consumed after a successful paid upstream response"). There is no primitive for "hold N vouchers, then release one to a fulfiller address on a verifier verdict." The buyer-side money flow is buyer->seller-at-request-time; a bounty needs escrow->fulfiller-on-acceptance, the inverse direction the sidecar has no code path for. Worse, EffectiveBuyerNamespace() hard-returns "llm" (types.go:649-651), so every PurchaseRequest's auths are written into the single shared llm-namespace buyer pool; there is no per-poster, per-fulfiller payout isolation primitive at all. The doc's own sec 10.1 admits the voucher is "escrow theater" (no on-chain condition, poster balance not reserved, refund = poster racing to cancelAuthorization), but then still routes the actual release through machinery that physically performs the opposite operation. Net: the "v0 ships this week on shipped code" claim is the single biggest reason this fails; the only honest v0 is "trusted coordinator manually triggers an off-band transfer," exactly the centralized-custodian design the doc claims to avoid. + + +### Sharpest 5 fixes + +1. DELETE the 'reuse buyer-sidecar settlement' claim everywhere (sec 4.5 step5, sec 7, sec 9). The buyer sidecar is a request-time micropayment RoundTripper (proxy.go:580-649, ConfirmSpend signer.go:295) that consumes a voucher on a live upstream 2xx; it has no release-on-verdict path and EffectiveBuyerNamespace() pins everything to 'llm' (types.go:649). Honest v0 = a coordinator agent that, on Verified, submits a single poster-pre-signed ERC-3009 voucher (payTo=fulfiller) by calling the facilitator /settle directly. State that this coordinator IS a trusted release authority and that sec 10.8's open question is a v0 blocker, not a v2 nicety. +2. FIX the RBAC claim, which is factually wrong and security-relevant. The doc says agent bounty RBAC is 'namespace-scoped' (sec 9 item7, sec 2). Verified: serviceoffers and purchaserequests are granted via ClusterRole 'openclaw-monetize-write' + ClusterRoleBinding to BOTH Hermes and OpenClaw SAs (obol-agent-monetize-rbac.yaml); cluster-wide create/update/delete. Adding 'bounties' there gives every agent cluster-wide write on all bounties/escrow refs in every namespace. Either move bounties to a namespaced Role/RoleBinding or state plainly the posture is cluster-wide and design the confused-deputy guard accordingly. +3. KILL the cross-namespace escrowRef before it ships. sec 10.7 calls the escrowRef.Namespace==bounty.Namespace guard 'load-bearing' but the field is {name,namespace} with namespace settable (sec 3.2). Given cluster-wide PurchaseRequest write, an attacker posts a ServiceBounty in ns A whose escrowRef points at a victim PurchaseRequest in ns B and drains its pre-signed auths. Mitigation: REMOVE the namespace field from escrowRef entirely (force same-namespace by construction) rather than relying on a runtime string compare a future refactor can drop. +4. CUT the entire ANE/Ray/worker substrate (sec 6) from v0. It is the largest, least-buildable surface (host-side Ray worker join with macOS multi-node officially unsupported, WorkerProfile probes, BountyRunner plugin registry, ane-train gating). v0 needs none of it: a bounty is fulfilled by any process that can produce a signed deliverable. Ship ServiceBounty CRD + reconcile + CLI + a single deterministic verifier (eval-score re-run OR PodMonitor SLA) and let fulfillment be opaque. Re-introduce the substrate only after money/verification rails are proven. +5. ADD a hard admission invariant that a ServiceBounty NEVER produces an HTTPRoute, Middleware, or any tunnel-exposed route, and that the servicebounty-controller has zero route/Secret/Namespace creation capability. Make it a test (extend embed_crd_test.go) so a ServiceBounty can never become unintended public ingress, and ensure registration.enabled discovery rides only the existing /skill.md + agent-registration.json surfaces, never a new public path. + + +### Economic / trust attacks + +- **[HIGH] Escrow griefing: poster never accepts (poster-as-oracle for non-deterministic deliverables). Fulfiller burns real compute on a fine-tune, submits a valid checkpoint, poster stalls (no deadline pressure on the poster) or rejects in bad faith. With voucher-MVP the poster's funds were never reserved (sec 10.1), so the poster's downside is zero while the fulfiller ate the compute. firstValidWins=false (sec 3.3b) makes this the DEFAULT for fine-tunes.** + - _Mitigation:_ Symmetric bonds: poster must also bond. On a deterministic-verifier pass the controller auto-releases with NO poster discretion. Reserve poster-manual strictly for explicitly-labeled subjective bounties, and on poster non-response past a review deadline auto-release to the fulfiller. Require real on-chain lock (BountyEscrow.sol) above a low value cap so the poster has skin in the game. +- **[HIGH] Reward front-running / claim-then-copy. firstValidWins=true + maxFulfillers + readable submissions: a watcher sees fulfiller A's revealed deliverable (or the payout tx in the mempool) and submits a copy to win the race. Commit-reveal (sec 5.0) is described but submissions are still readable and payout is an observable tx the coordinator submits.** + - _Mitigation:_ Enforce commit-reveal as a HARD protocol gate: H=hash(deliverable||salt) committed before any reveal, reward binds to the address that committed first so a copied reveal pays the original committer. Encrypt the deliverable to the poster or use threshold reveal so a watcher cannot lift it. +- **[HIGH] Sybil fulfillers + verifier collusion on tok/s and 'didn't train on test' claims. New agents carry near-zero ERC-8004 reputation but a sybil farm spins up many Agent CRs cheaply (agent new --create-wallet is free), self-claims, self-verifies in a consensus pool, and splits rewards. VRF-sampled stake-weighted selection assumes a deep honest same-hardware-class verifier pool that does not exist at launch (sec 10.4).** + - _Mitigation:_ At low pool depth fall back to a single trusted coordinator re-run (deterministic types only) and REFUSE non-deterministic bounties until verifier liquidity exists. Gate consensus weight on enclave-bound identity (one Secure Enclave key = one device) so each sybil costs real hardware. Make bond >= reward x (1/P(detected)) a validated admission constraint, not prose. Never pay tok/s/W or contamination claims on consensus; reputation+audit only. +- **[MED] Free-riding via report-vs-ship mismatch / fabricated benchmarks. sec 3.3a example criteria are all 'mmlu>=0, gsm8k>=0, humaneval>=0' (report-only) with verifier:consensus, so the acceptance gate accepts ANY score; only the optional, liquidity-dependent consensus re-run catches it.** + - _Mitigation:_ Reject threshold==0 / report-only acceptance criteria at admission for any reward-bearing bounty (a gate that always passes is not a gate). Require resultHashRequired + a mandatory deterministic re-run (not optional consensus) for eval-score bounties. Commit the eval dataset at creation with rows revealed post-commit and make contamination-resistant gold subsets non-optional. +- **[MED] Voucher replay / double-spend across bounties. A pre-signed ERC-3009 voucher is a bearer instrument valid for its whole validBefore window (types.go PreSignedAuth.ValidBefore). The same poster voucher, or one leaked from the shared 'llm' buyer pool (EffectiveBuyerNamespace=llm, types.go:649), could be submitted against multiple bounties or replayed before the poster cancels the nonce.** + - _Mitigation:_ One nonce per (bounty, epoch); controller tracks consumed nonces in status and refuses re-submission. Do not co-mingle bounty reward vouchers in the shared llm buyer ConfigMap pool; give bounty escrow isolated holding. Move to on-chain escrow where the contract enforces single-release per id. +- **[MED] Privilege escalation via serve-bounty handoff. The serve flow has a fulfiller run 'obol sell inference/http' to publish a /services/* route (sec 7.3). Combined with cluster-wide serviceoffers write (ClusterRole openclaw-monetize-write) and agent-factory namespace+secret create rights (hermes-agent-factory-write), a malicious bounty could induce an agent to publish routes or create namespaces/secrets it shouldn't, straining the 'only x402-gated + discovery endpoints public' tunnel invariant.** + - _Mitigation:_ servicebounty-controller must have ZERO route/Middleware/Secret/Namespace creation capability (assert via test). Route publication stays an explicit, separately-authorized 'obol sell' action by the node operator, never auto-driven by bounty reconciliation. Keep the existing hostnames restriction and confirm bounty discovery rides only /skill.md + agent-registration.json. +- **[MED] Card-escrow auth expiry + chargeback griefing (MPP #608 path). sec 4.4 uses a Stripe manual-capture PaymentIntent as the card 'lock'. Stripe auth holds expire (~7 days); a long-deadline bounty (sec 3.3c deadline weeks out) outlives the auth, so the lock silently evaporates, and the poster can chargeback after capture, clawing back paid work with no on-chain finality.** + - _Mitigation:_ Reject card-method bounties whose deadline exceeds the provider's max auth-hold window at admission. Treat card rewards as non-final (chargeback-exposed): require fulfiller reputation gating and/or a poster bond for card bounties, and never advertise card payout as settlement-final the way on-chain USDC is. + + +### Architecture conflicts with existing obol-stack invariants + +1. RBAC is NOT namespace-scoped, contrary to the doc's repeated claim. serviceoffers + purchaserequests are granted via ClusterRole 'openclaw-monetize-write' bound by ClusterRoleBinding to BOTH Hermes and OpenClaw SAs (obol-agent-monetize-rbac.yaml); cluster-wide create/update/patch/delete. sec 9 item7 and sec 2 assert namespace-scoped; false today. Adding 'bounties' there is a cluster-wide grant on all bounties and their escrow refs. +2. Payout cannot reuse the existing buyer-sidecar settlement path (sec 4.5 step5, sec 7, sec 9 item4). proxy.go is an http.RoundTripper that consumes one voucher only when a LIVE x402 upstream returns <400 (proxy.go:241,605; ConfirmSpend signer.go:295). It has no release-on-verifier-verdict path. The money direction (buyer->seller at request time) is the inverse of bounty payout (escrow->fulfiller on acceptance). Structural mismatch, not a tweak. +3. Shared-namespace escrow co-mingling. EffectiveBuyerNamespace() hard-returns 'llm' (types.go:649-651). PurchaseRequest auths all land in the single llm-namespace buyer ConfigMap pool built for buyer micropayments. Routing multi-poster bounty REWARD vouchers through PurchaseRequest (sec 3.2 escrowRef, sec 4.1 Option2) puts N posters' payout instruments into one shared pool with no per-bounty isolation; a custody and replay hazard the doc does not acknowledge. +4. Cross-namespace confused-deputy reintroduced. The doc adds escrowRef:{name,namespace} with a settable namespace (sec 3.2) and relies on a runtime guard copied from agent_resolver.go:46. Unlike the agent case nothing forces it: combined with cluster-wide PurchaseRequest write, a settable namespace is a drain-victim's-auths footgun. Omit the namespace field (force same-ns by construction). +5. Controller-holds-no-keys preserved in spirit but the doc smuggles a de-facto custodian. sec 4.2 has a coordinator agent submit the voucher / hold the verifier release key (sec 10.8 leaves open whether it holds that key), making a single coordinator a release authority over all open bounties. Strains the purely-declarative posture (agent.go reads only litellm-secrets in 'llm'; agent_resolver guards credential brokering). The bounty coordinator is a new trusted signer the architecture has no slot for. +6. verifyOnly permanence vs serve-bounty. x402.yaml:35 verifyOnly:true is permanent and forwardauth.go:24-36 documents the invariant. The serve handoff to obol sell inference is fine (own in-process settle), but the doc must not let a bounty reconcile flip verifyOnly or settle at the Traefik gate, and should say so explicitly; 'reconcilePayout route through internal/x402/card.go' (sec 4.5) brushes against gate settlement. +7. The 'agent' Type / agent-resolver precedent is mis-cited as a model for opaque polymorphism (sec 6.3). The agent resolver synthesizes a CONCRETE upstream (hermes:8642) so the existing route pipeline runs; it is NOT an opaque task-blob dispatcher. A ServiceBounty has no upstream and no route; the precedent supports 'sibling reconcile pass' but not 'controller operates on an opaque task.runner it never interprets.' + + +### Overstated / unbuildable ANE claims to retract from the body + +1. sec 5.1 / sec 3.3a present benchmark eval-score under 'verifier: consensus' as near-trust-minimized, but the consensus re-run depends on same-hardware-class verifier liquidity the doc itself admits does not exist (sec 10.4). For tok/s the doc is mostly honest (lower-bound + ranking only), but the sec 3.3a YAML still labels a tok/s-relevant benchmark 'consensus', overstating verifiability at launch. +2. sec 6.4 claims a 'real ANE-served bounty is demoable on one MacBook today' and lists ANEMLLServeRunner as a v0 deliverable (sec 9 item6). Per verified ANE facts this is real ONLY for <=8B models at <=4K context via ANEMLL Beta 0.3.5, at 2-5x SLOWER than the same Mac's GPU. Shipping an ANE runner in v0 sells a niche, slower-than-GPU path as a headline. The honest v0 substrate is MLX-GPU; ANE should be explicitly deferred, not a v0 runner. +3. The serve example (sec 3.3c, sec 5.1) uses 'verifier: tee-attestation' with a tee-quote artifact, implying TEE-verified serving. sec 5.2 correctly retracts this (attests the signer, not the computation; no macOS TEE for an LLM forward pass), but the example YAML and acceptance enum still advertise tee-attestation as a verifier for the computation/SLA. Rename the enum value (e.g. 'enclave-identity') so the CRD surface can't imply TEE-verified inference. +4. sec 6 keeps ane-train (Orion) as a gated-but-named capability and frames flipping OBOL_EXPERIMENTAL_ANE_TRAIN=1 as a near-term modularity payoff. Per facts Orion is research PoC on GPT-2-124M/Stories-110M at 5-9% of peak that does NOT replace GPU training. Naming finetune.ane in the design (even gated) overstates buildability; drop it from CRD/runner vocabulary until it leaves PoC, not parked behind an env flag. +5. Implicit in sec 6: that Ray gives a distributed Apple-silicon fabric. Verified: Ray multi-node on macOS is officially unsupported (Linux-only, RAY_ENABLE_WINDOWS_OR_OSX_CLUSTER=1 at-your-own-risk). The doc acknowledges this (sec 6.1, 6.5) but still lists Ray-head-on-Linux + Mac workers as the N-node story with more confidence than the 'untested' upstream status warrants. The honest N-node claim is GPU sharding via exo/MLX, not Ray. + + +### What the MVP should DROP + +1. The entire ANE/Ray/worker execution substrate (sec 6): host-side Ray worker join (macOS multi-node unsupported), WorkerProfile capability probes, the BountyRunner plugin registry, ANEMLLServeRunner, MLXLoRARunner, ane-train gating. v0 fulfillment can be opaque: any process producing a signed deliverable. The single largest, least-buildable cut. +2. Verifier consensus / VRF-sampled stake-weighted selection (sec 5.3); also drop 'consensus' from the v0 acceptance enum so v0 YAMLs cannot claim it. v0 exposes ONLY deterministic single-re-run (eval-score) and automatic PodMonitor SLA. +3. Fine-tune bounties entirely from v0 (sec 3.3b). They combine the weakest verification (held-out re-eval depends on contamination assumptions + reputation), the worst griefing surface (firstValidWins=false poster-discretion default), milestone/per-epoch voucher fan-out, and the MLX trainer runner. Ship benchmark-eval + serve-SLA first. +4. Card payments / MPP #608 escrow (sec 4.4, sec 3.3c). Stripe manual-capture as 'escrow' adds auth-expiry and chargeback failure modes orthogonal to the core crypto rail. Prove the on-chain/voucher path first; add card later as a pure adapter. +5. maxFulfillers>1, redundant/split payouts, and firstValidWins racing (sec 3.2-3.3). v0 should be single-winner, single-claim only; N-fulfiller contention multiplies front-running and double-spend surface before the basic single-winner flow is proven. +6. ERC-8004 reputation-as-verification-tax dial and the paid-MCP-verifier composition (sec 5, sec 8). v0 has no reputation history to throttle on and no verifier market to meter. Defer. + + +--- + +_Generated via a 7-agent design workflow (live ANE research → 4 parallel design perspectives → synthesis → adversarial red-team) on 2026-06-09._ diff --git a/plans/dataset-subscription-v1.1-pitch.md b/plans/dataset-subscription-v1.1-pitch.md new file mode 100644 index 00000000..ded2be59 --- /dev/null +++ b/plans/dataset-subscription-v1.1-pitch.md @@ -0,0 +1,110 @@ +# V1.1 — Continuous Dataset Subscriptions (escrow), a diagram pitch + +**Status:** pitch only — held until the batch-settlement payout leg lands. +**v1 (shipped P1–P6):** a dataset is sold **per version** — one payment buys +exactly the version it was scoped to. That is the right primitive for a +snapshot. It is the *wrong* primitive for a **living** dataset that ships a new +version every week and wants subscribers, not one-off buyers. + +--- + +## 1. The gap v1 leaves + +``` + v1 today: buyer ── pays atomic(v3) ──▶ token{maxVersion=3} ──▶ download v3 + new v4 ships ──▶ buyer must re-probe, re-pay atomic(v4) ❌ friction +``` + +A continuously-sold dataset wants: **pay once, keep receiving** — but without +the buyer pre-funding a platform (no custody) and without the buyer paying for +versions that never ship (no "pay upfront and hope"). + +That is exactly the shape escrow was built for: **authorize ≠ capture.** + +--- + +## 2. The mechanism: reserve → capture-per-epoch → void + +``` + SUBSCRIBE (once) + buyer ── signs ONE Permit2 voucher ─────────────────────────────▶ x402-escrow + { recipient: seller, (non-custodial: + max: price_per_epoch × N, holds the SIGNATURE, + deadlines: [d1, d2, … dN] } not the money) + │ + ┌───────────────────────── per new version (epoch) ─────────────────────┤ + │ seller ships v_k ─▶ controller appends signed version k │ + │ escrow CAPTURE(epoch k): transfer price_per_epoch buyer ─▶ seller │ (one recipient, + │ entitlement top-up: token.maxVersion = k │ one settlement, + │ buyer's existing token now downloads v_k — no re-pay │ per epoch) + └───────────────────────────────────────────────────────────────────────┤ + │ + UNSUBSCRIBE / seller stops shipping │ + buyer (or timeout) ── VOID remaining deadlines ─────────────────────────▶ no further capture + → buyer paid for delivered versions ONLY (funds never moved + for undelivered epochs) +``` + +**The invariant that makes it fair:** money moves **only** when a new version +is actually appended to the signed log. No version, no capture. The buyer's +worst case is bounded to `price_per_epoch × (versions actually shipped)`, and +the seller cannot capture ahead of delivery. + +--- + +## 3. What it reuses vs. what is genuinely new + +``` + REUSED (already shipped): + ├─ version log (P2) ........... defines an "epoch" = a new signed Seq + ├─ entitlement map (P2) ....... capture's side effect = token.maxVersion += 1 + ├─ x402-escrow facilitator .... reserve / capture / void, Permit2, non-custodial + └─ ForwardAuth + catalog ...... the priced offer, unchanged + + NEW (the v1.1 cost, stated honestly): + └─ a per-epoch, single-recipient CAPTURE LOOP keyed by subscription id + (Reserve a multi-deadline voucher; Capture once per shipped version to + the one seller; Void the tail on unsubscribe) +``` + +This is **not** `CaptureBatch` reuse. `CaptureBatch` splits *one* held auth +across *k recipients* in *one* settlement (e.g. an evaluator panel). A +subscription is the orthogonal shape: *one recipient*, *k settlements over +time*. v1.1 therefore needs new escrow wiring — a small loop, but real code, not +"zero-cost reuse." That honesty is why it is held, not hand-waved into v1. + +--- + +## 4. Why this is the right shape (and the moat) + +``` + prepaid credits (HF/cloud): pay upfront ─▶ platform custody ─▶ hope for value + v1.1 escrow subscription: authorize ─▶ NO custody ─▶ capture on delivery +``` + +- **No platform float, no custody honeypot** — the facilitator holds a + signature, never the money; capture moves funds owner→seller directly. +- **Pay for delivered value** — the seller is paid per shipped version; the + buyer is refunded-by-default (void) for versions never shipped. +- **Same wallet, same identity, same gate** — a subscription is just a + longer-lived entitlement; the `entitle()` download gate is unchanged. + +It is the dataset analogue of the metered-inference escrow pitch: *price the +outcome (a delivered version), not the prepayment.* + +--- + +## 5. Dependency & ask + +``` + blocked on ──▶ the batch-settlement PAYOUT leg (open; tracked in the + OpenRouter-direction work) — capture needs the same + settle-to-recipient path that leg finalizes. + + ask ───────▶ green-light v1.1 as the FIRST consumer of the payout leg once + it lands: it is a small, well-scoped capture loop on top of the + version log + entitlement map already shipped in P2. +``` + +One artifact, two uses, **and now a recurring revenue shape** — without anyone +fronting a deposit or taking custody. diff --git a/plans/evaluator-market-research-notes.md b/plans/evaluator-market-research-notes.md new file mode 100644 index 00000000..db562df5 --- /dev/null +++ b/plans/evaluator-market-research-notes.md @@ -0,0 +1,111 @@ +# Evaluator Market — Adjacent-Protocol Research Notes + +> Deep-research pass (2026-06-10) challenging the ServiceBounty evaluator-market design +> (verification-by-default, median-of-k quorum, Shadow→Probation→Full ladder, no staking/slashing) +> against production decentralized-evaluation systems. 23 sources fetched, 114 claims extracted, +> 25 verified by 3-vote adversarial panels — 25 confirmed, 0 refuted. +> +> **Coverage caveat**: only Bittensor, Kleros, and the p+epsilon literature produced claims that +> survived verification. Truebit, Numerai, Chainlink OCR, Gensyn/Prime Intellect, Ritual/Allora, +> and the EigenLayer baseline did not — "state of the art" below means two production systems +> plus one canonical attack paper. Non-deterministic-verification comparisons (research item 3) +> remain only partially answered. + +## Verdict on the core bet + +**No-stake reputation-weighted selection is vindicated by Bittensor's production record.** +Pre-dTAO on-chain analysis of all 64 subnets (6.66M events): top 1% of wallets held a median +~90% of stake; over half of subnets were 51%-attackable by <1% of wallets colluding; validator +rewards correlated with **stake** at r≈0.80–0.95 vs r≈0.50 for consensus quality. Capital, not +evaluation quality, governs stake-weighted systems. [arXiv:2507.02951 — note: pre-dTAO snapshot, +FLock.io-affiliated authors, wallet-level not entity-level] + +But the cold-start lesson transfers: **low-participant markets are trivially capturable by tiny +coalitions.** The ladder + random assignment + pair-diversity must be benchmarked against +small-coalition takeover during the early low-evaluator-count phase. + +## Three confirmed weaknesses + +### 1. Median-proximity scoring is gameable by free-riders (Bittensor weight-copying) + +Bittensor validators copied publicly visible weight matrices, computed the stake-weighted median +to predict Yuma Consensus, and **earned higher APY than honest validators** — because rewards flow +from alignment-with-consensus, not evaluation labor. Our median-of-k + rerun-tolerance rewards +proximity-to-median identically. + +Commit-reveal only fixes **same-round** copying. Bittensor's own docs concede (against interest): +*"If the ground truth about miner rankings is overly static... nothing can prevent weight +copying."* For repeated/static bounty types, copying the prior round's revealed median survives +any concealment window. **The countermeasure is making the answer move, not longer concealment**: +rotate the private-dataset fraction per round; per-round task perturbation. +[docs.learnbittensor.org/concepts/weight-copying-in-bittensor, /commit-reveal; Opentensor weight-copier paper May 2024] + +### 2. p+epsilon bribery — executed in production, and our two missing defenses are the ones that worked + +An attacker who credibly pledges to pay each evaluator P+ε **conditional on the dishonest outcome +losing** makes dishonest voting dominant; if everyone complies the bribe is never paid — attack +succeeds at **zero realized cost**. "No attacker would spend that much" is invalid: the budget is +pledged, not spent. [Buterin 2015, blog.ethereum.org/2015/01/28/p-epsilon-attack] + +Not theoretical: executed on Ethereum mainnet against Kleros (Doges on Trial 2018, disputeIDs +70–76, 94; conditional-bribe contracts at 0xbaf2eb...). In disputeID 75 the bribe **won rounds 1 +and 2** against small panels; reversed only when a community member funded an appeal to a fresh +14-juror round (attacker lost 0-14). [blog.kleros.io/cryptoeconomic-deep-dive-doges-on-trial] + +The two operative defenses are both absent from our v1: +- **Slashable deposits** (bribe must exceed deposit, not per-round reward) — rejected by design. +- **Escalating appeals** — attacker lockup grows O(N²) in panel size (~110M PNK at 2023 General + Court parameters). This is the defense that actually worked in production. + +Our bribery floor = per-task reward + discounted value of k evaluators' future reputation +streams. Partial mitigants (private dataset fraction breaks pure-Schelling structure; coordinating +a continuous median within tolerance is harder than flipping a binary vote) bound but don't +eliminate exposure. Kleros guidance: commit-reveal matters **most** when appeals are unlikely — +in a no-appeal design it is load-bearing, not optional. + +### 3. Stake is the canonical sybil defense for random sortition; attestation-only has no production precedent + +Kleros whitepaper §4.2.1: *"If jurors were simply drawn randomly, a malicious party could create a +high number of addresses... By being drawn more times than all honest jurors, the malicious party +would control the system."* Zero stake = never drawn; no reputation ladder exists in Kleros. +Device attestation + rep decay must absorb the entire sybil burden alone. The **free Shadow tier +is the attack surface**: cost-per-attested-device (emulation, device farms, resale) must exceed +the discounted value of progressing a sybil to a Full seat. + +### Bonus: base-rate guessing defeats coherence-based reputation + +Kleros production data: ~88-89% juror coherence against a ~70%-Reject outcome skew — always +voting the base rate beats random with zero effort. **If most bounty evaluations pass, zero-effort +"pass" votes look reputationally coherent and climb our ladder.** +[blog.kleros.io/parameterization-of-kleros-courts] + +## Mechanisms worth stealing verbatim + +| Steal | From | What it fixes | +|---|---|---| +| `hash(score, salt, address)` — bind evaluator address into the commitment | Kleros §4.3 | Commitment copy/replay between evaluators | +| Reveal-failure penalty ≥ outlier penalty | Kleros incentive system | Silent abstention as cheap exit when your committed score looks bad | +| Automated reveals (Drand time-lock) or non-reveal = penalized worst case | Bittensor CR4 | Selective revelation (validators gamed manual reveals by revealing only when it helped) | +| EV-balance parameterization: tune penalties so no-effort evaluation is EV-negative | Kleros parameterization | Lazy rubber-stamping; portable framework, our lever is rep decay instead of voteStake | +| Difficulty-weighted reputation: reward being right when others were wrong, not easy unanimity | (derived from Kleros base-rate finding) | Base-rate climbing | +| Known-fail canaries seeded into the private dataset fraction | (derived) | Makes rubber-stampers detectably wrong at a measurable rate | +| Disagreement-triggered escalation to a larger fresh panel | Kleros appeals | The only defense that beat p+epsilon in production | + +## Amendments + +**v1 (cheap, do in the ladder slice):** +1. Commitment format = `hash(score ‖ salt ‖ evaluatorAddress)`. +2. Fixed reveal window; non-reveal treated as worst-case outlier (rep penalty ≥ divergence penalty). Ladder constants in `task.yaml` gain `revealWindow` + `nonRevealPenalty`. +3. Seed `datasetCommit.privateFraction` with known-fail canaries; rotate the private fraction per round for repeatable bounty types. +4. Reputation gains weighted by disagreement/difficulty — unanimous easy agreement earns ~0; correct minority positions earn most. + +**v2 (design before cross-party):** +5. Disagreement-triggered escalation: if revealed scores straddle the tolerance band, re-run with a larger fresh panel (2k+1), poster pre-approves escalation budget cap at post time. Note: weaker than Kleros's version (no loser-deposit redistribution funds it) — escalation cost falls on the eval budget. +6. Quantify the bribery floor in OBOL: discounted value of a Full-tier seat's future income stream is our analog of Kleros's O(N²) lockup. Model it; if corrupting ⌈k/2⌉+1 medians costs less than plausible bounty values, raise k or value caps. +7. Drand-style time-lock reveals when cross-party (committer-controlled reveals are an exploit vector even with penalties). + +**Open questions carried forward:** +- OBOL-denominated value of a Full-tier reputation stream (the no-stake bribery floor) — unquantified. +- Is device attestation an adequate sybil bound? No production precedent exists. +- Which task-type registry entries have static-enough ground truth that commit-reveal is structurally insufficient → what rotation cadence makes copying unprofitable? +- How Truebit/Gensyn/Numerai/Chainlink handle non-deterministic verification — didn't survive this round's verification; re-research before the verifiable-compute task type ships. diff --git a/plans/servicebounty-technical-spec.md b/plans/servicebounty-technical-spec.md new file mode 100644 index 00000000..d655eec1 --- /dev/null +++ b/plans/servicebounty-technical-spec.md @@ -0,0 +1,349 @@ +# ServiceBounty + Evaluator Market + Real Escrow — Technical Specification + +> Status: implemented and live-smoked on a local k3d stack (sandbox branch). +> Scope: the demand-side bounty marketplace, the evaluator verification +> market, and the non-custodial Permit2 escrow leg, as built on obol-stack. +> Audience: engineers reviewing the design. + +## 1. System overview + +The obol stack already had a supply side: `obol sell` publishes x402 +payment-gated services (HTTP 402 micropayments, Traefik ForwardAuth, +ERC-8004 discovery). This work adds the demand side and the trust layer: + +``` + DISCOVERY ERC-8004 identity · /api/services.json · /skill.md + │ + ┌──────────────────────────┼──────────────────────────┐ + ▼ ▼ ▼ + SUPPLY DEMAND TRUST + ServiceOffer CRD ServiceBounty CRD EvaluatorEnrollment CRD + obol sell … obol bounty post commit-reveal quorums + x402 per-call escrowed reward reputation ladder + └──────────────────────────┼──────────────────────────┘ + ▼ + MONEY RAIL (shared) + x402-escrow facilitator: Permit2 vouchers, batch capture +``` + +One controller (`serviceoffer-controller`, `x402` namespace) reconciles all +three CRDs. The controller **never holds keys and never signs** — every +signature comes from an agent wallet (remote-signer) or the operator. + +## 2. CRDs (group `obol.org/v1alpha1`) + +### 2.1 ServiceBounty (`sb`) + +Demand-side inverse of ServiceOffer. + +Spec (abridged): +- `task.typeRef` — references an embedded task package (`benchmark@v1`, + `benchlocal@v1`, `finetune@v1` staged/disabled) with typed params + (unknown params are admission-rejected; `required` enforced). +- `reward` — amount + asset (USDC via EIP-3009 or OBOL via Permit2; asset + carries `eip712Name`/`eip712Version`), `payment.network` chain alias. +- `eval.mode` — `required` (default) | `dangerouslySkipped` + (CLI: `--dangerously-skip-verification`). Skipped bounties settle on the + poster's verdict only, produce no ERC-8004 entries, and suppress + reputation effects. +- `deliverable.report.variants[]` — A2UI report variants (see §8). + +Status: `conditions[]` are machine truth; `phase` is a rollup. Key fields: +`evaluatorPanel[]{address,seat}`, `evaluations[]{address, commitHash, +score, revealedAt, withinBand, phase, seat, paid, validationTxHash, +grounded}`, `revealDeadline`, `panelSeed{source,round,randomness,signature}`, +`escalation{…}`, `bondState`, `evalBudgetState`, `escrowSpender`, +`ladderRecorded`. + +Write channel: all participant input rides **annotations** (RBAC-scopeable, +no controller API surface): + +| Annotation | Writer | Payload | +|---|---|---| +| `obol.org/claim` | fulfiller | claim intent (address) | +| `obol.org/submit` | fulfiller | submission ref | +| `obol.org/verdict` | poster | explicit override verdict | +| `obol.org/eval-commit-` | evaluator | commit hash (round 0) | +| `obol.org/eval-reveal-` | evaluator | `{score, salt, validationTx?}` | +| `obol.org/eval-commit-r1-` / `eval-reveal-r1-` | evaluator | escalation round | +| `obol.org/reward-voucher` | poster agent | Permit2 voucher JSON (§5) | +| `obol.org/bond-voucher` | fulfiller agent | Permit2 voucher JSON | +| `obol.org/eval-voucher`, `obol.org/eval-voucher-r1` | poster agent | Permit2 voucher JSON | + +Bounty reconcile is structurally pinned (test-enforced) to **never create +HTTPRoute / Middleware / ReferenceGrant / Secret / Namespace** — a bounty +can never become ingress or broker credentials. + +### 2.2 EvaluatorEnrollment (`ee`) + +Per-evaluator registration: `spec{address (0x40-hex), taskTypes[], +attestation{scheme: none|secure-enclave, publicKey, signature}}`. +Controller is **read-only on spec** (no create/delete — test-pinned); +it owns only `status.records[]` (per task type): +`{taskType, tier: shadow|probation|full, shadowAgreements, probationEvals, +completed, divergences, groundedEvals, lastEvalAt, recentFulfillers[≤5]}`. + +### 2.3 Task packages (embedded, versioned) + +`task.yaml` per package declares params, eval policy and ladder constants: + +```yaml +eval: + defaultK: 3 # counting quorum size + ladder: + shadowAgreements: 5 # in-band shadow verdicts → Probation + probationEvals: 10 # clean paid evals → Full + probationValueCap: "50.00" # no probation seat above this reward + revealWindow: 10m # commits close before any reveal opens + nonRevealPenalty: outlier # non-reveal graded as worst-case outlier + decayHalfLife: 720h # reputation half-life on inactivity + escalationWindow: 30m # poster funding window for round 1 + escalationEpsilon: 5 # knife-edge band; negative disables +``` + +## 3. Lifecycle and verdicts + +``` + post ──► claim ──► submit ──► [eval market] ──► Verified | Rejected ──► Paid + │ │ │ │ + │ │ │ ├─ Verified: capture reward → fulfiller + │ │ │ │ batch-capture eval leg → panel + │ │ │ │ void bond ("Returned") + │ │ │ └─ Rejected: capture bond → poster + │ │ │ void reward; eval leg still paid + │ ├─ self-bond reserved (-bond) (win-or-lose) + │ └─ reward voucher signable (fulfiller known) + └─ escrow Reserve() — intent only, "AwaitingVoucher" +``` + +Verdict sources, in precedence order: poster override annotation +(`PosterOverride`, always explicit) > evaluator quorum (`EvaluatorQuorum`, +only when `eval.mode=required`). The quorum verdict latches: once spoken it +is never re-derived. + +## 4. Evaluator market protocol + +### 4.1 Commit–reveal quorum + +- Commitment (address-bound, anti-copy): + `commitHash = "0x" + hex(sha256("||"))` + Scores are integers 0–100. First write wins per address. +- The reveal window opens only after **k counting commits** exist + (`revealDeadline = now + revealWindow`). +- Reveal verification recomputes the hash; mismatch → `BadReveal`. + Non-reveal past deadline → graded as worst-case outlier (`NonReveal`). +- Quorum verdict: `median(counting reveals) >= 50` → Verified + (`evalPassThreshold = 50`). A reveal further than **20** points from the + median is out-of-band (`evalOutlierBand`) — divergence for ladder + bookkeeping. + +### 4.2 Panel selection + +Deterministic, seeded weighted lottery over enrolled evaluators for the +task type: + +- Seed source (`OBOL_BOUNTY_SEED` env): + - `local` (default): `sha256(bountyUID)`. + - `drand`: `sha256(bountyUID ‖ beacon.randomness)` where the beacon round + is the first quicknet round strictly after `creationTimestamp + 30s`. + The BLS signature (bls-unchained-g1-rfc9380, G1) is verified in-process + against the quicknet group key; provenance + `{source, round, randomness, signature}` is persisted to + `status.panelSeed` for third-party re-verification. Relay failure + requeues — **no silent local fallback** (fallback would be a + seed-grinding lever). +- Weight: `w = max(0.1, 1 + 0.1 × (effectiveCompleted − divergences))` + - decay: `effectiveCompleted = completed × 2^(−idle / decayHalfLife)` + (pure read-time math; `lastEvalAt` nil → no decay; no status writes) + - grounded bonus: `w ×= 1 + min(1, groundedEvals / max(1, completed))` + - pair diversity: `w ×= 0.25` for repeat evaluator↔fulfiller pairs + (`recentFulfillers`, capped 5). +- Tier gating at read time: a `full` record is treated as probation when + `effectiveCompleted < probationEvals` and idle exceeds the half-life. +- Panel shape: **k counting seats** (Full tier) + **1 probation seat** + (counts fully in the median, half pay, only on bounties under + `probationValueCap`, requires k ≥ 3) + **≤ 2 shadow seats** (free, + randomly assigned, never counted or paid, graded against the median for + ladder credit). +- Cold start: pool smaller than k → open-door fallback (latched by the + `PanelSelected` condition); open-door participants still earn ladder + records, bootstrapping the pool. +- Selection is idempotent: the `PanelSelected` latch guarantees a panel is + never re-rolled. + +### 4.3 Escalation (bribery / dispute defense) + +Trigger, checked after grading and before the quorum verdict +(`eval.mode=required`, single-round latch): +- dispersion: out-of-band counting reveals `≥ ⌈k/2⌉`, or +- knife-edge: `|median − 50| ≤ escalationEpsilon` (0 = unset → default 5; + negative disables). + +On trigger: fresh panel of **2k+1** (round-0 panel and fulfiller excluded; +seed = `sha256(round0seed ‖ "escalation-r1")`), all seats full-pay, funded +by the poster within `escalationWindow` via the `eval-voucher-r1` +annotation (escrow id `-eval-r1`). Funded → full commit-reveal cycle +with `-r1` annotation prefixes; the round-1 median over 2k+1 is **final**. +Unfunded past the deadline → `EscalationUnfunded`, round-0 median stands. + +### 4.4 ERC-8004 grounding + +Evaluators may submit `validationResponse` on-chain with their own wallets +(the CLI builds calldata; the controller never signs): + +- Canonical request hash: + `requestHash = keccak256("obol/bounty-eval/v1||")` +- ERC-8004 v2.0.0 registries (verified on-chain, `getVersion()=="2.0.0"`): + `validationResponse(bytes32,uint8,string,bytes32,string)` selector + `0x3d659a96`; `giveFeedback(...)` selector `0x3c036a7e`. + Base Sepolia ValidationRegistry: `0x8004Cb1BF31DAf7788923b405b754f57acEB4272`. +- On reveal with `validationTx`, the controller reads the registry through + in-cluster eRPC and sets `grounded=true` iff on-chain responder == + evaluator and on-chain response == revealed score. Grounding **never + blocks or changes the verdict** (chain down → ungrounded + condition + note). Grounded evals feed the selection weight bonus. + +### 4.5 Anti-griefing + +Fulfiller self-bond: reserved at claim (`-bond`), voided ("Returned") +on Verified or honest timeout, captured to the poster ("Forfeited") on +Rejected — offsets the poster's burned eval budget. + +## 5. Money rail — Permit2 vouchers + x402-escrow facilitator + +### 5.1 Voucher (agent-signed authorization) + +JSON object ferried via annotations: + +```json +{ + "owner": "0x…", // signer (poster or fulfiller agent wallet) + "token": "0x…", // asset contract + "network": "base-sepolia", // chain alias + "spender": "0x…", // facilitator address (signature-bound) + "nonce": "…", // uint256 decimal, deterministic (below) + "deadline": 1760000000, // unix; hard on-chain expiry + "recipients": [{"address":"0x…","amount":"…"}], // atomic units/seat + "signature": "0x…" // 65-byte EIP-712 signature +} +``` + +- EIP-712: Uniswap **Permit2 SignatureTransfer `PermitBatchTransferFrom`**. + Domain `{name:"Permit2", chainId, verifyingContract: + 0x000000000022D473030F116dDEE9F6B43aC78BA3}` (no version field). + `permitted[i] = {token, amount}` — one entry per recipient seat. +- Deterministic nonce: `uint256(keccak256(uid + "|" + leg))` with legs + `reward|bond|eval|eval-r1` — re-funding is idempotent and a consumed + nonce is unrepeatable (Permit2 unordered nonces), so a voucher cannot be + double-captured. +- Signing: agent remote-signer `SignTypedData` (REST) or a dev `--key`. +- Who signs when: reward → poster at claim (fulfiller known); bond → + fulfiller at claim; eval → poster at panel selection (seat addresses + known, probation seat at half price); eval-r1 → poster at escalation. + +### 5.2 Facilitator service (`x402-escrow`) + +In-cluster, ClusterIP-only (never tunnel-exposed), port 8403, distroless. + +| Route | Semantics | +|---|---| +| `POST /escrow/reserve/{id}` | no voucher → `{state:"AwaitingVoucher", spender}`; with voucher → verify (recover signer == owner, spender binding, future deadline, positive amounts) → `Reserved`. Re-reserve attaches/replaces a voucher pre-capture. | +| `POST /escrow/capture/{id}` | requires Reserved+voucher. Optional `recipients[]` body (batch): must be a **subset of the voucher's declared seats with exact amounts** — omitted seats are simply unpaid. Builds `permitBatchTransferFrom` (transferDetails pair index-wise with permitted; omitted seats get `requestedAmount=0`), submits with the facilitator wallet, waits for the receipt → `{state:"Captured", txHash}`. Idempotent. | +| `POST /escrow/void/{id}` | store-only; the voucher deadline is the hard guarantee. | +| `GET /escrow/info` | `{address, networks}` — agents fetch the spender before signing. | + +Auth: bearer token (constant-time compare). Settlement key from env +(`OBOL_ESCROW_KEY`) or a remote signer; RPC via in-cluster eRPC per +network. State: file-backed, atomic writes. + +**Custody model**: funds move owner → recipients **directly through +Permit2** in one transaction; the facilitator pays gas and is never +custodial. Loss is bounded by signed amounts + deadline. + +**Documented v1 trust residue**: Permit2 SignatureTransfer lets the +spender choose `to` on-chain, so recipient binding is enforced by +facilitator policy (stored-voucher subset rule) + namespaced RBAC on the +voucher annotations, not by the signature. Cryptographic binding requires +`permitWitnessTransferFrom` + a disperse contract (planned upgrade). A +forged/foreign voucher can never move third-party funds (signature +recovery binds the owner). + +**Controller coupling**: the controller reads escrow URL/token **only from +env** (`OBOL_BOUNTY_ESCROW_URL/TOKEN`) — never from CR spec or +annotations (exfiltration guard, test-pinned). Escrow ids: ``, +`-bond`, `-eval`, `-eval-r1`. A capture refused for a +missing voucher parks as condition `EscrowAwaitingVoucher` + requeue; +`obol bounty status` prints the exact fund command to run next. + +## 6. Convergence with the inference-exchange direction + +The facilitator's `/escrow/*` + batch-capture routes are deliberately the +same primitive a regional inference gateway needs to batch-settle earnings +to `obol sell inference` operators (one tx, k sellers). The bounty eval +leg and gateway payouts share this workstream; sellers need zero changes. + +## 7. CLI surface (additions) + +``` +obol bounty post [--dangerously-skip-verification] [--yes] … +obol bounty fund (--key|--signer-url) [--spender 0x…] # reward voucher +obol bounty claim … [--bond-key|--bond-signer-url] # bond voucher +obol bounty eval enroll|pool # ladder state +obol bounty eval fund # eval / eval-r1 voucher +obol bounty eval commit --address --score --salt +obol bounty eval reveal --address --score --salt [--validation-tx] +obol bounty eval calldata [--bounty --address | --request-hash] # ERC-8004, own wallet +obol bounty feedback # giveFeedback calldata +obol bounty status # seed/panel/escalation/grounding/escrow +``` + +## 8. Reports (A2UI v1.0) + +Task packages ship `deliverable.report.variants[]`: +`{kind: declarative|mcp-app, surface, catalogId}` — declarative variants +target the A2UI v1.0 basic catalog (schema-validated against the spec +repo); `mcp-app` variants are self-contained HTML served as a `custom` +node (`url_encoded:` content) — double-iframe isolation is entirely the +client's job. A free `bounty_report` MCP tool renders reports with +client-preference catalog negotiation (first supported variant wins) and +path-traversal guards. + +## 9. Security invariants (test-pinned) + +1. Controller never signs; holds no key material. +2. Escrow endpoint config from controller env only — never spec/annotations. +3. Bounty reconcile creates no HTTPRoute/Middleware/ReferenceGrant/Secret/ + Namespace (structural source scan across all bounty reconcile files). +4. Controller read-only on EvaluatorEnrollment spec; no create/delete. +5. Agent bounty/enrollment RBAC is namespaced. +6. CRD ↔ Go bidirectional parity test (walks every json tag against the + hand-written CRD schema; has caught real silent-pruning bugs). +7. Voucher capture ≤ signed amounts, to declared recipients only. +8. drand mode has no silent local fallback (no seed-grinding path). +9. `x402-escrow` is ClusterIP-internal; frontend/eRPC hostname + restrictions untouched. + +## 10. Validation status + +- Unit/controller: full `go build/vet/test` green, including commit-reveal, + panel, escalation, grounding, voucher-ferry, decay, drand-fixture (BLS + verify passes on a recorded beacon, fails on a flipped bit), Permit2 + golden calldata, and the parity/RBAC/structural pins. +- Live cluster smokes: eval-market quorum pass/reject; full panel mode + (3 full + 1 shadow, outsider gated, median excludes shadow, eval budget + batch-captured, shadow unpaid, ladder records written, validation-tx + provenance). +- Money-rail compatibility: flow-12 (OBOL Permit2 sell→buy→settle through + the x402-buyer sidecar) passes against the upstream-sync x402-rs + facilitator build (v1.5.6 overlay): 3 settlements `status=0x1`, exact + buyer/seller balance deltas, ~113k avg gas per settlement. + +## 11. Known gaps / next steps + +- Disperse contract + `permitWitnessTransferFrom` for signature-bound + recipients. +- Live Base Sepolia escrow smoke with deployed OBOL. +- VRF only if drand provenance proves insufficient cross-party. +- On-chain `giveFeedback` submission flow (calldata exists today). +- Frontend A2UI rendering (separate repo). +- x402-rs fork release (v1.5.6 overlay) push + image repin. diff --git a/tests/test_gh_post_no_redirect.py b/tests/test_gh_post_no_redirect.py new file mode 100644 index 00000000..6b19d5be --- /dev/null +++ b/tests/test_gh_post_no_redirect.py @@ -0,0 +1,157 @@ +#!/usr/bin/env python3 +"""Unit tests for gh_post.py's no-redirect guard. + +Regression suite for the token-exfiltration vector where ``_gh_request`` +used urllib's DEFAULT opener: CPython's HTTPRedirectHandler re-sends every +request header — including the bearer-token auth header — to a 3xx +redirect target, with no cross-origin stripping. A redirecting (or +attacker-influenced) GITHUB_API_BASE could therefore receive the literal +token. gh_post.py now routes every call through a module-level opener +whose ``redirect_request`` returns None (mirroring smoke.py's +``_NoRedirect``), so a 3xx is surfaced as the final status and the token +never leaves the intended endpoint. +""" +import http.server +import importlib.util +import sys +import threading +import time +import unittest +from pathlib import Path + +MODULE_PATH = ( + Path(__file__).resolve().parents[1] + / "internal" + / "embed" + / "skills" + / "smoke-test" + / "scripts" + / "gh_post.py" +) + +TOKEN = "ghp_test_secret_token_do_not_leak" + + +def load_gh_post_module(): + spec = importlib.util.spec_from_file_location("gh_post_smoke", MODULE_PATH) + module = importlib.util.module_from_spec(spec) + assert spec.loader is not None + sys.modules[spec.name] = module + spec.loader.exec_module(module) + return module + + +class _RecordingHandler(http.server.BaseHTTPRequestHandler): + """Records every request (method, path, Authorization header).""" + + requests = None # set per-server below + + def _record_and_respond(self, status, extra_headers=()): + self.requests.append( + (self.command, self.path, self.headers.get("Authorization")) + ) + self.send_response(status) + for name, value in extra_headers: + self.send_header(name, value) + self.send_header("Content-Length", "2") + self.end_headers() + self.wfile.write(b"{}") + + def log_message(self, *args): # keep test output clean + pass + + +def _start_server(handler_cls): + server = http.server.ThreadingHTTPServer(("127.0.0.1", 0), handler_cls) + thread = threading.Thread(target=server.serve_forever, daemon=True) + thread.start() + return server + + +class GhPostNoRedirectTests(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.gh_post = load_gh_post_module() + + # "Attacker" server on a different origin — must NEVER be contacted. + attacker_requests = [] + + class AttackerHandler(_RecordingHandler): + requests = attacker_requests + + def do_GET(self): + self._record_and_respond(200) + + do_PUT = do_GET + + cls.attacker_requests = attacker_requests + cls.attacker = _start_server(AttackerHandler) + attacker_url = "http://127.0.0.1:%d/leak" % cls.attacker.server_address[1] + + # Redirector: answers every request with 302 -> attacker origin. + redirector_requests = [] + + class RedirectorHandler(_RecordingHandler): + requests = redirector_requests + + def do_GET(self): + self._record_and_respond(302, [("Location", attacker_url)]) + + do_PUT = do_GET + + cls.redirector_requests = redirector_requests + cls.redirector = _start_server(RedirectorHandler) + cls.redirector_base = "http://127.0.0.1:%d" % cls.redirector.server_address[1] + + @classmethod + def tearDownClass(cls): + for server in (cls.attacker, cls.redirector): + server.shutdown() + server.server_close() + + def setUp(self): + del self.attacker_requests[:] + del self.redirector_requests[:] + # _put_file builds its URL from module-level API_BASE; point it at + # the redirector for the duration of each test. + self._orig_api_base = self.gh_post.API_BASE + self.gh_post.API_BASE = self.redirector_base + + def tearDown(self): + self.gh_post.API_BASE = self._orig_api_base + + # ── the opener refuses redirects outright ───────────────────────────── + + def test_no_redirect_handler_returns_none(self): + handler = self.gh_post._NoRedirect() + self.assertIsNone( + handler.redirect_request(None, None, 302, "Found", {}, "http://evil") + ) + + # ── empirical: a 3xx is final and the token never crosses origins ───── + + def test_get_does_not_follow_redirect_or_leak_token(self): + status, _, _ = self.gh_post._gh_request( + "GET", self.redirector_base + "/repos/o/r/contents/x", TOKEN + ) + self.assertEqual(status, 302) + self.assertEqual( + self.attacker_requests, [], "redirect target must never be contacted" + ) + # Sanity: the intended endpoint did see the Bearer header once. + self.assertEqual(len(self.redirector_requests), 1) + self.assertEqual(self.redirector_requests[0][2], "Bearer " + TOKEN) + + def test_put_treats_redirect_as_hard_failure(self): + deadline = time.monotonic() + 5 + with self.assertRaises(self.gh_post.PostError) as ctx: + self.gh_post._put_file( + "o/r", "reports/x/y.md", "msg", b"body", None, TOKEN, deadline + ) + self.assertIn("status 302", str(ctx.exception)) + self.assertNotIn(TOKEN, str(ctx.exception)) + self.assertEqual(self.attacker_requests, []) + + +if __name__ == "__main__": + unittest.main() From 6f0998b4ff44379e47c482f628245b7165b42932 Mon Sep 17 00:00:00 2001 From: bussyjd <145845+bussyjd@users.noreply.github.com> Date: Tue, 16 Jun 2026 16:36:16 +0400 Subject: [PATCH 03/11] fix: harden decentralized subsystems (dataset, bounty, monetize, research, escrow) Squash of the v0.11-rc High/Critical fixes: dataset x402-verifies paid join + verifies the signed log on fetch; bounty reward-capture bound to the accepted fulfiller seat; monetize charges perMB datasets by size + federates datasetFileHash; research authenticates worker identity, caps threshold payouts, GCs device codes; escrow returns 409 on reserve with conflicting settlement terms. --- cmd/obol/dataset.go | 88 +++++++++----- internal/dataset/client.go | 95 +++++++++++---- internal/dataset/client_test.go | 77 +++++++----- internal/dataset/coverage_test.go | 10 +- internal/dataset/server.go | 61 +++++----- internal/dataset/server_test.go | 112 ++++++++++++------ internal/research/groupauth/groupauth.go | 30 +++++ internal/research/groupauth/groupauth_test.go | 46 +++++++ internal/research/kb/kb.go | 25 +++- internal/research/kb/kb_test.go | 29 +++++ internal/research/server/server.go | 9 +- internal/schemas/payment.go | 25 ++++ internal/schemas/payment_test.go | 33 ++++++ internal/schemas/service-catalog.schema.json | 5 + internal/schemas/service_catalog.go | 1 + internal/serviceoffercontroller/bounty.go | 18 ++- .../bounty_escalation_test.go | 11 ++ .../bounty_lifecycle_test.go | 7 ++ internal/serviceoffercontroller/render.go | 14 ++- .../serviceoffercontroller/render_test.go | 4 + internal/x402/escrow/server.go | 25 ++++ internal/x402/escrow/server_test.go | 28 +++++ internal/x402/serviceoffer_source.go | 9 +- internal/x402/serviceoffer_source_test.go | 12 +- 24 files changed, 609 insertions(+), 165 deletions(-) diff --git a/cmd/obol/dataset.go b/cmd/obol/dataset.go index a0744d78..f391530d 100644 --- a/cmd/obol/dataset.go +++ b/cmd/obol/dataset.go @@ -28,14 +28,15 @@ import ( "os/exec" "os/signal" "path/filepath" - "strconv" "strings" "syscall" "time" "github.com/ObolNetwork/obol-stack/internal/config" "github.com/ObolNetwork/obol-stack/internal/dataset" + x402 "github.com/ObolNetwork/obol-stack/internal/x402" "github.com/urfave/cli/v3" + x402types "github.com/x402-foundation/x402/go/types" ) // datasetState lets approve/status reach a running publish server. @@ -117,6 +118,13 @@ func appendDatasetVersion(cfg *config.Config, cmd *cli.Command, id, bundleDir st return err } log := dataset.LogFromVersions(st.Versions) + // Tamper-evidence is only real if the producer refuses to extend a chain it + // cannot verify: a rewritten earlier entry must not be silently signed over. + if len(st.Versions) > 0 { + if err := log.Verify(dataset.EthVerifier{}, signer.SignerID()); err != nil { + return fmt.Errorf("refusing to extend dataset %q: existing version log fails verification: %w", id, err) + } + } v, err := log.Append(manifestHash, fileHash, size, signer, time.Now()) if err != nil { return err @@ -149,6 +157,9 @@ func datasetPublishCommand(cfg *config.Config) *cli.Command { &cli.StringFlag{Name: "membership", Usage: "open | invite", Value: "invite"}, &cli.IntFlag{Name: "port", Usage: "Local port (0 = pick a free one)", Value: 0}, &cli.BoolFlag{Name: "no-tunnel", Usage: "Serve locally only"}, + &cli.StringFlag{Name: "price", Usage: "Per-join price in USDC (enables x402 paid join; empty = invite/open only)"}, + &cli.StringFlag{Name: "pay-to", Usage: "USDC recipient (default: the dataset owner address)"}, + &cli.StringFlag{Name: "chain", Usage: "Payment chain", Value: "base-sepolia"}, }, Action: func(ctx context.Context, cmd *cli.Command) error { u := getUI(cmd) @@ -171,6 +182,12 @@ func datasetPublishCommand(cfg *config.Config) *cli.Command { if len(st.Versions) == 0 { return fmt.Errorf("dataset %q has no versions — run 'obol dataset from' first", id) } + // Never serve a chain we cannot verify against the owner key: a + // tampered persisted store must fail closed, not be published. + pubLog := dataset.LogFromVersions(st.Versions) + if err := pubLog.Verify(dataset.EthVerifier{}, signer.SignerID()); err != nil { + return fmt.Errorf("refusing to serve dataset %q: version log fails verification: %w", id, err) + } artifacts := dataset.NewFileArtifacts() for seq, path := range st.Artifacts { artifacts.Set(seq, path) @@ -182,16 +199,42 @@ func datasetPublishCommand(cfg *config.Config) *cli.Command { if err != nil { return err } + + // Optional x402 paid join: when --price is set, gate /join/paid with + // a real facilitator-verified, in-process-settled payment for the + // join price (the owner address is the default payee). Without it, + // the dataset is invite/open-membership only — never free-on-payment. + var paidJoin func(http.Handler) http.Handler + var joinAtomic string + if price := strings.TrimSpace(cmd.String("price")); price != "" { + chain, cerr := x402.ResolveChainInfo(cmd.String("chain")) + if cerr != nil { + return fmt.Errorf("paid join: %w", cerr) + } + payTo := strings.TrimSpace(cmd.String("pay-to")) + if payTo == "" { + payTo = signer.SignerID() + } + req := x402.BuildV2Requirement(chain, price, payTo, 0) + joinAtomic = req.Amount + paidJoin = x402.NewForwardAuthMiddleware(x402.ForwardAuthConfig{ + FacilitatorURL: x402.DefaultFacilitatorURL, + VerifyOnly: false, + SettlesInProcess: true, + }, []x402types.PaymentRequirements{req}) + } + srv := dataset.NewServer(dataset.Config{ - ID: id, - Membership: cmd.String("membership"), - OwnerToken: ownerToken, - OwnerSigner: signer.SignerID(), - Log: dataset.LogFromVersions(st.Versions), - Ents: ents, - Store: store, - Artifacts: artifacts, - Payments: forwardedPayment{}, + ID: id, + Membership: cmd.String("membership"), + OwnerToken: ownerToken, + OwnerSigner: signer.SignerID(), + Log: pubLog, + Ents: ents, + Store: store, + Artifacts: artifacts, + PaidJoin: paidJoin, + JoinPriceAtomic: joinAtomic, }) ln, err := net.Listen("tcp", fmt.Sprintf("127.0.0.1:%d", cmd.Int("port"))) @@ -213,7 +256,7 @@ func datasetPublishCommand(cfg *config.Config) *cli.Command { } } - head, _ := dataset.LogFromVersions(st.Versions).Head() + head, _ := pubLog.Head() _ = writeDatasetState(cfg, datasetState{ID: id, LocalAddr: localAddr, PublicURL: publicURL, OwnerToken: ownerToken}) u.Successf("Dataset %q published (head version %d)", id, head.Seq) @@ -222,7 +265,7 @@ func datasetPublishCommand(cfg *config.Config) *cli.Command { u.Infof("Membership: %s", cmd.String("membership")) u.Blank() u.Bold("Buyers fetch with:") - u.Printf(" obol buy dataset %s --id %s --member-token ", publicURL, id) + u.Printf(" obol buy dataset %s --id %s --member-token --owner %s", publicURL, id, signer.SignerID()) if cmd.String("membership") == dataset.MembershipInvite { u.Dim("Admit a worker's printed code: obol dataset approve ") } @@ -345,6 +388,7 @@ func buyDatasetCommand(cfg *config.Config) *cli.Command { &cli.IntFlag{Name: "version", Usage: "Version to fetch (0 = head)"}, &cli.StringFlag{Name: "member-token", Usage: "Member token (owner-issued or payment-minted)", Required: true}, &cli.StringFlag{Name: "out", Usage: "Output file (default -v.jsonl)"}, + &cli.StringFlag{Name: "owner", Usage: "Expected owner 0x address that must have signed the version log (pins identity; recommended)"}, }, Action: func(ctx context.Context, cmd *cli.Command) error { u := getUI(cmd) @@ -367,6 +411,7 @@ func buyDatasetCommand(cfg *config.Config) *cli.Command { res, err := dataset.Fetch(ctx, dataset.FetchOptions{ BaseURL: base, ID: id, Version: cmd.Int("version"), Token: cmd.String("member-token"), OutPath: out, + ExpectedOwner: strings.TrimSpace(cmd.String("owner")), }) if err != nil { return err @@ -381,25 +426,6 @@ func buyDatasetCommand(cfg *config.Config) *cli.Command { } } -// --- payment validation (behind the edge x402-verifier) --- - -// forwardedPayment trusts the edge x402-verifier to have proven a settled -// payment upstream; it extracts the paid version/amount from forwarded -// headers. It is only reachable on the membership-gated /join/paid route -// (never a raw public route). -type forwardedPayment struct{} - -func (forwardedPayment) Validate(r *http.Request, _ string) (int, string, error) { - if r.Header.Get("X-Payment-Response") == "" && r.Header.Get("X-Payment") == "" { - return 0, "", fmt.Errorf("no settled payment forwarded") - } - v, _ := strconv.Atoi(r.Header.Get("X-Dataset-Version")) - if v < 1 { - v = 1 - } - return v, r.Header.Get("X-Dataset-Atomic"), nil -} - // --- state + url helpers --- func datasetServeDir(cfg *config.Config) string { return filepath.Join(cfg.ConfigDir, "dataset-serve") } diff --git a/internal/dataset/client.go b/internal/dataset/client.go index 7858b115..08c2c21e 100644 --- a/internal/dataset/client.go +++ b/internal/dataset/client.go @@ -2,6 +2,7 @@ package dataset import ( "context" + "encoding/json" "fmt" "io" "net/http" @@ -27,31 +28,41 @@ type FetchOptions struct { Token string OutPath string Client *http.Client + // ExpectedOwner, when set, pins the 0x address that must have signed every + // entry in the version log. Empty still verifies signatures + chain + // linkage, but skips the owner-identity check (use it to defeat a seller + // that swapped in a different signing key). + ExpectedOwner string } // Fetch downloads a dataset version to OutPath with HTTP Range resume and -// verifies the whole-file SHA-256 against the X-Dataset-File-Hash header the -// server commits on every response. A partial OutPath+".part" from an earlier -// interrupted run is resumed rather than restarted. The verification is done -// once over the reassembled whole file (the hash is of the whole artifact, -// never a chunk). +// verifies the whole-file SHA-256 against the OWNER-SIGNED version log — not a +// response header a malicious seller controls. It first fetches and verifies +// the signed chain (pinning ExpectedOwner when set), takes the authoritative +// file-hash commitment from it, then downloads and compares the reassembled +// whole file to that. A partial OutPath+".part" from an earlier interrupted +// run is resumed rather than restarted. func Fetch(ctx context.Context, opts FetchOptions) (FetchResult, error) { if opts.Client == nil { opts.Client = http.DefaultClient } - part := opts.OutPath + ".part" + // Integrity is anchored in the signed log, so resolve the target version's + // signed commitment BEFORE trusting any served bytes. + want, err := resolveSignedVersion(ctx, opts) + if err != nil { + return FetchResult{}, err + } + expectedHash := strings.ToLower(want.FileHash) + + part := opts.OutPath + ".part" have := int64(0) if fi, err := os.Stat(part); err == nil { have = fi.Size() } resumed := have > 0 - url := strings.TrimSuffix(opts.BaseURL, "/") + "/dataset/" + opts.ID + "/download" - if opts.Version > 0 { - url += "?version=" + strconv.Itoa(opts.Version) - } - + url := strings.TrimSuffix(opts.BaseURL, "/") + "/dataset/" + opts.ID + "/download?version=" + strconv.Itoa(want.Seq) req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil) if err != nil { return FetchResult{}, err @@ -79,13 +90,6 @@ func Fetch(ctx context.Context, opts FetchOptions) (FetchResult, error) { return FetchResult{}, fmt.Errorf("dataset: download %s -> %d: %s", url, resp.StatusCode, strings.TrimSpace(string(body))) } - fileHash := strings.ToLower(resp.Header.Get("X-Dataset-File-Hash")) - manifestHash := strings.ToLower(resp.Header.Get("X-Dataset-Manifest-Hash")) - version, _ := strconv.Atoi(resp.Header.Get("X-Dataset-Version")) - if fileHash == "" { - return FetchResult{}, fmt.Errorf("dataset: server did not advertise X-Dataset-File-Hash; refusing unverifiable download") - } - flag := os.O_CREATE | os.O_WRONLY if have > 0 { flag |= os.O_APPEND @@ -104,18 +108,65 @@ func Fetch(ctx context.Context, opts FetchOptions) (FetchResult, error) { return FetchResult{}, err } - // Verify the reassembled whole file against the committed hash. + // Verify the reassembled whole file against the SIGNED commitment. got, size, err := hashFile(part) if err != nil { return FetchResult{}, err } - if got != fileHash { - return FetchResult{}, fmt.Errorf("dataset: file hash mismatch: got %s, advertised %s (corrupt or tampered)", got, fileHash) + if got != expectedHash { + return FetchResult{}, fmt.Errorf("dataset: file hash mismatch: got %s, signed version log commits %s (corrupt or tampered)", got, expectedHash) } if err := os.Rename(part, opts.OutPath); err != nil { return FetchResult{}, fmt.Errorf("dataset: finalize download: %w", err) } - return FetchResult{Version: version, ManifestHash: manifestHash, FileHash: fileHash, Bytes: size, Resumed: resumed}, nil + return FetchResult{Version: want.Seq, ManifestHash: want.ManifestHash, FileHash: expectedHash, Bytes: size, Resumed: resumed}, nil +} + +// resolveSignedVersion fetches the seller's version log, verifies the chain +// (signatures, linkage, and the pinned owner when set), and returns the +// requested version's signed entry — the authoritative file-hash commitment. +func resolveSignedVersion(ctx context.Context, opts FetchOptions) (DatasetVersion, error) { + url := strings.TrimSuffix(opts.BaseURL, "/") + "/dataset/" + opts.ID + "/versions" + req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil) + if err != nil { + return DatasetVersion{}, err + } + req.Header.Set("Authorization", "Bearer "+opts.Token) + + resp, err := opts.Client.Do(req) + if err != nil { + return DatasetVersion{}, err + } + defer resp.Body.Close() + if resp.StatusCode != http.StatusOK { + body, _ := io.ReadAll(io.LimitReader(resp.Body, 512)) + return DatasetVersion{}, fmt.Errorf("dataset: versions %s -> %d: %s", url, resp.StatusCode, strings.TrimSpace(string(body))) + } + + var payload struct { + Versions []DatasetVersion `json:"versions"` + } + if err := json.NewDecoder(io.LimitReader(resp.Body, 1<<20)).Decode(&payload); err != nil { + return DatasetVersion{}, fmt.Errorf("dataset: decode versions: %w", err) + } + + log := LogFromVersions(payload.Versions) + if err := log.Verify(EthVerifier{}, opts.ExpectedOwner); err != nil { + return DatasetVersion{}, fmt.Errorf("dataset: version log failed verification: %w", err) + } + + if opts.Version > 0 { + v, ok := log.Get(opts.Version) + if !ok { + return DatasetVersion{}, fmt.Errorf("dataset: version %d not present in signed log", opts.Version) + } + return v, nil + } + h, ok := log.Head() + if !ok { + return DatasetVersion{}, fmt.Errorf("dataset: signed version log is empty") + } + return h, nil } // VerifyFile recomputes a file's SHA-256 and compares it to want. diff --git a/internal/dataset/client_test.go b/internal/dataset/client_test.go index 9f237a3f..d73ca864 100644 --- a/internal/dataset/client_test.go +++ b/internal/dataset/client_test.go @@ -3,7 +3,6 @@ package dataset import ( "bytes" "context" - "net/http" "net/http/httptest" "os" "path/filepath" @@ -11,17 +10,18 @@ import ( ) func TestFetch_DownloadsAndVerifies(t *testing.T) { - ts := newTestServer(t, MembershipOpen, nil) + ts := newTestServer(t, MembershipOpen, passGate) httpSrv := httptest.NewServer(ts.srv.Handler()) defer httpSrv.Close() out := filepath.Join(t.TempDir(), "ds-v1.jsonl") res, err := Fetch(context.Background(), FetchOptions{ - BaseURL: httpSrv.URL, - ID: "ds", - Version: 1, - Token: ownerToken, // owner is a download superuser - OutPath: out, + BaseURL: httpSrv.URL, + ID: "ds", + Version: 1, + Token: ownerToken, // owner is a download superuser + OutPath: out, + ExpectedOwner: ts.signer.SignerID(), }) if err != nil { t.Fatalf("Fetch: %v", err) @@ -45,7 +45,7 @@ func TestFetch_DownloadsAndVerifies(t *testing.T) { } func TestFetch_ResumesFromPartial(t *testing.T) { - ts := newTestServer(t, MembershipOpen, nil) + ts := newTestServer(t, MembershipOpen, passGate) httpSrv := httptest.NewServer(ts.srv.Handler()) defer httpSrv.Close() @@ -57,6 +57,7 @@ func TestFetch_ResumesFromPartial(t *testing.T) { res, err := Fetch(context.Background(), FetchOptions{ BaseURL: httpSrv.URL, ID: "ds", Version: 1, Token: ownerToken, OutPath: out, + ExpectedOwner: ts.signer.SignerID(), }) if err != nil { t.Fatalf("Fetch: %v", err) @@ -70,36 +71,58 @@ func TestFetch_ResumesFromPartial(t *testing.T) { } } -func TestFetch_RejectsHashMismatch(t *testing.T) { - // A malicious/buggy server that serves the wrong bytes but advertises the - // real hash must be caught by the whole-file verification. +func TestFetch_RejectsTamperedBytesAgainstSignedLog(t *testing.T) { + // The signed version log commits the REAL hash, but the server serves + // different bytes (and would happily advertise the real hash in a header). + // Integrity is anchored in the signed log, so the swap is caught. + signer := newTestSigner(t) real := []byte("the-real-bytes\n") - srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { - w.Header().Set("X-Dataset-File-Hash", sha256hex(real)) - w.Header().Set("X-Dataset-Version", "1") - _, _ = w.Write([]byte("TAMPERED-DIFFERENT-BYTES\n")) - })) - defer srv.Close() + log := NewLog() + if _, err := log.Append(hashA, sha256hex(real), int64(len(real)), signer, fixedTime); err != nil { + t.Fatalf("append: %v", err) + } + srv := NewServer(Config{ + ID: "ds", Membership: MembershipOpen, OwnerToken: ownerToken, + OwnerSigner: signer.SignerID(), + Log: log, + Ents: NewEntitlements(), + Store: NewStore(filepath.Join(t.TempDir(), "ds.json")), + Artifacts: memArtifacts{data: map[int][]byte{1: []byte("TAMPERED-DIFFERENT-BYTES\n")}}, + PaidJoin: passGate, + }) + httpSrv := httptest.NewServer(srv.Handler()) + defer httpSrv.Close() out := filepath.Join(t.TempDir(), "ds.jsonl") - _, err := Fetch(context.Background(), FetchOptions{BaseURL: srv.URL, ID: "ds", Version: 1, Token: "t", OutPath: out}) + _, err := Fetch(context.Background(), FetchOptions{ + BaseURL: httpSrv.URL, ID: "ds", Version: 1, Token: ownerToken, OutPath: out, + ExpectedOwner: signer.SignerID(), + }) if err == nil { - t.Fatal("Fetch accepted bytes that don't match the advertised hash") + t.Fatal("Fetch accepted bytes that don't match the signed version log") } if _, statErr := os.Stat(out); !os.IsNotExist(statErr) { t.Error("a failed verification must not leave a finalized output file") } } -func TestFetch_RefusesUnverifiableDownload(t *testing.T) { - // No X-Dataset-File-Hash -> refuse (don't write an unverifiable file). - srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { - _, _ = w.Write([]byte("anything")) - })) - defer srv.Close() +func TestFetch_RejectsWrongExpectedOwner(t *testing.T) { + // A seller that swapped in a different signing key is caught by pinning the + // expected owner: every entry's recovered signer fails the identity check. + ts := newTestServer(t, MembershipOpen, passGate) + httpSrv := httptest.NewServer(ts.srv.Handler()) + defer httpSrv.Close() + out := filepath.Join(t.TempDir(), "ds.jsonl") - if _, err := Fetch(context.Background(), FetchOptions{BaseURL: srv.URL, ID: "ds", Token: "t", OutPath: out}); err == nil { - t.Error("Fetch accepted a download with no file-hash commitment") + _, err := Fetch(context.Background(), FetchOptions{ + BaseURL: httpSrv.URL, ID: "ds", Version: 1, Token: ownerToken, OutPath: out, + ExpectedOwner: "0x000000000000000000000000000000000000dead", + }) + if err == nil { + t.Fatal("Fetch accepted a version log signed by an unexpected owner") + } + if _, statErr := os.Stat(out); !os.IsNotExist(statErr) { + t.Error("a failed owner check must not leave a finalized output file") } } diff --git a/internal/dataset/coverage_test.go b/internal/dataset/coverage_test.go index ae5e4e67..47b15dad 100644 --- a/internal/dataset/coverage_test.go +++ b/internal/dataset/coverage_test.go @@ -100,16 +100,16 @@ func TestServer_ErrorPaths(t *testing.T) { t.Errorf("= %d, want 503", w.Code) } }) - t.Run("join paid rejects payment error", func(t *testing.T) { - ts := newTestServer(t, MembershipInvite, fakePayments{err: errFake}) - w := do(t, ts.srv.Handler(), "POST", "/dataset/ds/join/paid", "", nil) + t.Run("join paid fails closed when the gate rejects", func(t *testing.T) { + ts := newTestServer(t, MembershipInvite, gateOnHeader) // no X-Test-Paid -> 402 + w := do(t, ts.srv.Handler(), "POST", "/dataset/ds/join/paid?version=1", "", nil) if w.Code != http.StatusPaymentRequired { t.Errorf("= %d, want 402", w.Code) } }) t.Run("join paid unknown version", func(t *testing.T) { - ts := newTestServer(t, MembershipInvite, fakePayments{version: 99}) - w := do(t, ts.srv.Handler(), "POST", "/dataset/ds/join/paid", "", nil) + ts := newTestServer(t, MembershipInvite, passGate) + w := do(t, ts.srv.Handler(), "POST", "/dataset/ds/join/paid?version=99", "", nil) if w.Code != http.StatusBadRequest { t.Errorf("= %d, want 400", w.Code) } diff --git a/internal/dataset/server.go b/internal/dataset/server.go index 8fb1325c..97963882 100644 --- a/internal/dataset/server.go +++ b/internal/dataset/server.go @@ -18,15 +18,6 @@ const ( MembershipInvite = "invite" ) -// PaymentValidator validates a forwarded proof-of-payment for the paid-join -// path. It runs ONLY behind the edge x402-verifier ForwardAuth (which has -// already proven a settled payment); its job is to confirm the payment binds -// to THIS dataset offer and to extract which version + atomic amount was paid. -// It must never be exposed as a raw public route. -type PaymentValidator interface { - Validate(r *http.Request, offerID string) (version int, atomic string, err error) -} - // Config builds a Server. Log/Ents/Store/Artifacts are owned by the caller so // the CLI can rehydrate them from disk before serving. type Config struct { @@ -39,8 +30,15 @@ type Config struct { Ents *Entitlements Store *Store Artifacts Artifacts - Payments PaymentValidator - Logger *slog.Logger + // PaidJoin, when non-nil, wraps the /dataset/{id}/join/paid route with an + // x402 payment gate (verify + in-process settle). Reaching the handler + // therefore means a real on-chain payment occurred — the handler never + // trusts client-supplied payment/version headers. nil disables paid join. + PaidJoin func(http.Handler) http.Handler + // JoinPriceAtomic is the atomic-unit join price recorded on a paid + // entitlement (the gate enforces it; this value is for the ledger only). + JoinPriceAtomic string + Logger *slog.Logger } // Server hosts one versioned dataset over an owner-run, membership-gated HTTP @@ -56,7 +54,8 @@ type Server struct { ents *Entitlements store *Store artifacts Artifacts - payments PaymentValidator + paidJoin func(http.Handler) http.Handler + joinAtomic string logger *slog.Logger } @@ -90,7 +89,8 @@ func NewServer(cfg Config) *Server { ents: cfg.Ents, store: cfg.Store, artifacts: cfg.Artifacts, - payments: cfg.Payments, + paidJoin: cfg.PaidJoin, + joinAtomic: cfg.JoinPriceAtomic, logger: cfg.Logger, } // Rehydrate groupauth from persisted entitlements (verified by hash; the @@ -111,9 +111,16 @@ func (s *Server) Handler() http.Handler { mux.HandleFunc("POST /auth/device/token", s.handleDeviceToken) mux.HandleFunc("POST /auth/device/approve", s.ownerOnly(s.handleApprove)) - // Paid join — payment mints a version-scoped member token. Behind the - // edge x402-verifier ForwardAuth, never a raw public route. - mux.HandleFunc("POST /dataset/{id}/join/paid", s.handleJoinPaid) + // Paid join — the x402 gate proves (and settles) a real on-chain payment + // before the handler mints a version-scoped member token. With no gate + // configured the route is disabled, not open: paid join fails closed. + if s.paidJoin != nil { + mux.Handle("POST /dataset/{id}/join/paid", s.paidJoin(http.HandlerFunc(s.handleJoinPaid))) + } else { + mux.HandleFunc("POST /dataset/{id}/join/paid", func(w http.ResponseWriter, _ *http.Request) { + writeErr(w, http.StatusServiceUnavailable, "paid_join_disabled", "paid join not configured (publish with --price)") + }) + } // Member-gated reads. mux.HandleFunc("GET /dataset/{id}/versions", s.member(s.handleVersions)) @@ -206,17 +213,13 @@ func (s *Server) handleJoinPaid(w http.ResponseWriter, r *http.Request) { writeErr(w, http.StatusNotFound, "unknown_dataset", "no such dataset on this host") return } - if s.payments == nil { - writeErr(w, http.StatusServiceUnavailable, "paid_join_disabled", "paid join not configured") - return - } - version, atomic, err := s.payments.Validate(r, s.id) - if err != nil { - writeErr(w, http.StatusPaymentRequired, "payment_required", err.Error()) - return - } - if _, ok := s.log.Get(version); !ok { - writeErr(w, http.StatusBadRequest, "unknown_version", fmt.Sprintf("version %d not published", version)) + // Reaching here means the x402 gate wrapping this route has already + // verified (and is settling) a real on-chain payment for the join price. + // The version to entitle is taken from the request URL, validated against + // the published versions — NEVER from a client-supplied trust header. + version, ok := s.resolveVersion(r) + if !ok { + writeErr(w, http.StatusBadRequest, "unknown_version", "requested version is not published") return } raw, hash, err := s.auth.Mint(s.groupID, "paid-v"+strconv.Itoa(version)) @@ -228,11 +231,11 @@ func (s *Server) handleJoinPaid(w http.ResponseWriter, r *http.Request) { TokenHash: hash, GroupID: s.groupID, MaxVersion: version, - PaidAtomic: atomic, + PaidAtomic: s.joinAtomic, Label: "paid", }) s.persist() - s.logger.Info("paid join", "dataset", s.id, "version", version, "atomic", atomic) + s.logger.Info("paid join", "dataset", s.id, "version", version, "atomic", s.joinAtomic) writeJSON(w, http.StatusOK, map[string]any{"token": raw, "version": version}) } diff --git a/internal/dataset/server_test.go b/internal/dataset/server_test.go index 9e898a68..21d92f3d 100644 --- a/internal/dataset/server_test.go +++ b/internal/dataset/server_test.go @@ -16,15 +16,22 @@ import ( const ownerToken = "owner-secret-token" -// fakePayments stands in for the edge x402-verifier's forwarded proof. -type fakePayments struct { - version int - atomic string - err error -} - -func (f fakePayments) Validate(_ *http.Request, _ string) (int, string, error) { - return f.version, f.atomic, f.err +// passGate is a stub paid-join gate that treats every request as already paid. +// The real x402 verify/settle path is covered by the inference-gateway tests; +// here we exercise the server's post-payment behaviour (version from the URL, +// never from a client header) and that the route is gated at all. +func passGate(next http.Handler) http.Handler { return next } + +// gateOnHeader simulates the x402 gate: it passes only when X-Test-Paid is set, +// otherwise returns 402 — so a test can prove the route fails closed. +func gateOnHeader(next http.Handler) http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.Header.Get("X-Test-Paid") != "yes" { + writeErr(w, http.StatusPaymentRequired, "payment_required", "pay first") + return + } + next.ServeHTTP(w, r) + }) } // memArtifacts serves version bytes from memory (seekable for Range). @@ -50,7 +57,7 @@ type testServer struct { store *Store } -func newTestServer(t *testing.T, membership string, payments PaymentValidator) testServer { +func newTestServer(t *testing.T, membership string, gate func(http.Handler) http.Handler) testServer { t.Helper() signer := newTestSigner(t) artifact := []byte(`{"messages":[{"role":"user","content":"hi"}]}` + "\n") @@ -62,15 +69,16 @@ func newTestServer(t *testing.T, membership string, payments PaymentValidator) t } store := NewStore(filepath.Join(t.TempDir(), "ds.json")) srv := NewServer(Config{ - ID: "ds", - Membership: membership, - OwnerToken: ownerToken, - OwnerSigner: signer.SignerID(), - Log: log, - Ents: NewEntitlements(), - Store: store, - Artifacts: memArtifacts{data: map[int][]byte{1: artifact}}, - Payments: payments, + ID: "ds", + Membership: membership, + OwnerToken: ownerToken, + OwnerSigner: signer.SignerID(), + Log: log, + Ents: NewEntitlements(), + Store: store, + Artifacts: memArtifacts{data: map[int][]byte{1: artifact}}, + PaidJoin: gate, + JoinPriceAtomic: "1000", }) return testServer{srv: srv, bytesV1: artifact, signer: signer, store: store} } @@ -90,7 +98,7 @@ func do(t *testing.T, h http.Handler, method, target, token string, hdr map[stri } func TestServer_PaidJoinThenDownload(t *testing.T) { - ts := newTestServer(t, MembershipInvite, fakePayments{version: 1, atomic: "1000"}) + ts := newTestServer(t, MembershipInvite, passGate) h := ts.srv.Handler() // Pay -> mint a version-1 token. @@ -129,7 +137,7 @@ func TestServer_PaidJoinThenDownload(t *testing.T) { } func TestServer_VersionScopeEnforced(t *testing.T) { - ts := newTestServer(t, MembershipInvite, fakePayments{version: 1, atomic: "1000"}) + ts := newTestServer(t, MembershipInvite, passGate) h := ts.srv.Handler() // Append a v2 to the log so ?version=2 is a real (but unpaid) version. @@ -137,7 +145,7 @@ func TestServer_VersionScopeEnforced(t *testing.T) { t.Fatalf("append v2: %v", err) } - w := do(t, h, "POST", "/dataset/ds/join/paid", "", nil) // pays for v1 + w := do(t, h, "POST", "/dataset/ds/join/paid?version=1", "", nil) // pays for v1 (version from URL) var join struct{ Token string } _ = json.Unmarshal(w.Body.Bytes(), &join) @@ -156,7 +164,7 @@ func TestServer_VersionScopeEnforced(t *testing.T) { } func TestServer_RangeReturns206WithWholeFileHash(t *testing.T) { - ts := newTestServer(t, MembershipOpen, nil) + ts := newTestServer(t, MembershipOpen, passGate) h := ts.srv.Handler() token := ownerToken // owner is a download superuser @@ -174,7 +182,7 @@ func TestServer_RangeReturns206WithWholeFileHash(t *testing.T) { } func TestServer_GatesRejectNonMembersAndAnonymous(t *testing.T) { - ts := newTestServer(t, MembershipInvite, fakePayments{version: 1}) + ts := newTestServer(t, MembershipInvite, passGate) h := ts.srv.Handler() if w := do(t, h, "GET", "/dataset/ds/download?version=1", "", nil); w.Code != http.StatusUnauthorized { @@ -192,7 +200,7 @@ func TestServer_GatesRejectNonMembersAndAnonymous(t *testing.T) { } func TestServer_DeviceAuthAdmitGetsHeadAccess(t *testing.T) { - ts := newTestServer(t, MembershipOpen, nil) // open: auto-approved on code request + ts := newTestServer(t, MembershipOpen, passGate) // open: auto-approved on code request h := ts.srv.Handler() // device code (auto-approved) -> token @@ -219,7 +227,7 @@ func TestServer_DeviceAuthAdmitGetsHeadAccess(t *testing.T) { } func TestServer_VerifyReportsChainHealth(t *testing.T) { - ts := newTestServer(t, MembershipOpen, nil) + ts := newTestServer(t, MembershipOpen, passGate) h := ts.srv.Handler() w := do(t, h, "GET", "/dataset/ds/verify", ownerToken, nil) @@ -237,7 +245,7 @@ func TestServer_VerifyReportsChainHealth(t *testing.T) { } func TestServer_RehydratesPaidMemberAfterRestart(t *testing.T) { - ts := newTestServer(t, MembershipInvite, fakePayments{version: 1, atomic: "1000"}) + ts := newTestServer(t, MembershipInvite, passGate) h := ts.srv.Handler() jw := do(t, h, "POST", "/dataset/ds/join/paid", "", nil) @@ -254,12 +262,13 @@ func TestServer_RehydratesPaidMemberAfterRestart(t *testing.T) { } restarted := NewServer(Config{ ID: "ds", Membership: MembershipInvite, OwnerToken: ownerToken, - OwnerSigner: ts.signer.SignerID(), - Log: LogFromVersions(st.Versions), - Ents: loadEnts(st.Entitlements), - Store: ts.store, - Artifacts: memArtifacts{data: map[int][]byte{1: ts.bytesV1}}, - Payments: fakePayments{version: 1}, + OwnerSigner: ts.signer.SignerID(), + Log: LogFromVersions(st.Versions), + Ents: loadEnts(st.Entitlements), + Store: ts.store, + Artifacts: memArtifacts{data: map[int][]byte{1: ts.bytesV1}}, + PaidJoin: passGate, + JoinPriceAtomic: "1000", }) // The pre-restart token still works — the member did not have to re-pay. @@ -268,6 +277,43 @@ func TestServer_RehydratesPaidMemberAfterRestart(t *testing.T) { } } +// TestServer_PaidJoinGatedAndVersionFromURL is the C1 regression: the paid-join +// route fails closed without the x402 gate's approval, and the minted version +// is taken from the URL — a spoofed X-Dataset-Version header is ignored. +func TestServer_PaidJoinGatedAndVersionFromURL(t *testing.T) { + ts := newTestServer(t, MembershipInvite, gateOnHeader) + h := ts.srv.Handler() + // Append a v2 so head=2; we still pay for v1 via the URL. + if _, err := ts.srv.log.Append(hashB, hashB, 5, ts.signer, fixedTime); err != nil { + t.Fatalf("append v2: %v", err) + } + + // No payment -> the gate fails closed (NOT a free token). + if w := do(t, h, "POST", "/dataset/ds/join/paid?version=1", "", nil); w.Code != http.StatusPaymentRequired { + t.Fatalf("unpaid join = %d, want 402", w.Code) + } + + // Paid: version is taken from the URL (=1); a spoofed X-Dataset-Version:99 + // header must be ignored (this was the header-trust bug). + w := do(t, h, "POST", "/dataset/ds/join/paid?version=1", "", + map[string]string{"X-Test-Paid": "yes", "X-Dataset-Version": "99"}) + if w.Code != http.StatusOK { + t.Fatalf("paid join = %d, body %s", w.Code, w.Body.String()) + } + var join struct { + Token string `json:"token"` + Version int `json:"version"` + } + _ = json.Unmarshal(w.Body.Bytes(), &join) + if join.Version != 1 { + t.Fatalf("minted version = %d, want 1 (from URL, not the spoofed header 99)", join.Version) + } + // The v1-scoped token must still not reach v2. + if dw := do(t, h, "GET", "/dataset/ds/download?version=2", join.Token, nil); dw.Code != http.StatusForbidden { + t.Errorf("v1-paid token download v2 = %d, want 403", dw.Code) + } +} + func loadEnts(ents []Entitlement) *Entitlements { e := NewEntitlements() e.Load(ents) diff --git a/internal/research/groupauth/groupauth.go b/internal/research/groupauth/groupauth.go index 146b1cc2..4796e27b 100644 --- a/internal/research/groupauth/groupauth.go +++ b/internal/research/groupauth/groupauth.go @@ -121,6 +121,11 @@ func (a *Authority) RequestCode(workerID string) (CodeGrant, error) { a.mu.Lock() defer a.mu.Unlock() + // Drop abandoned/expired codes so they can't accumulate unboundedly: a + // long-lived program server is reachable over a public tunnel, and every + // un-polled login would otherwise live forever — a cheap memory-growth DoS. + a.sweepExpiredLocked() + dcVal, err := randomHex(32) if err != nil { return CodeGrant{}, err @@ -157,6 +162,19 @@ func (a *Authority) RequestCode(workerID string) (CodeGrant, error) { }, nil } +// sweepExpiredLocked removes device codes past their expiry from both indexes. +// Caller holds a.mu. O(live codes); since expired entries are deleted, the maps +// stay bounded by the codes requested within one CodeExpiry window. +func (a *Authority) sweepExpiredLocked() { + now := a.now() + for dcv, dc := range a.byDevice { + if now.After(dc.ExpiresAt) { + delete(a.byDevice, dcv) + delete(a.byUser, dc.UserCode) + } + } +} + // Approve is the membership decision: the program owner links a pending // user_code to the group. Only the owner calls this (it is the gate that // makes the knowledge base private to the group). user_code matching is @@ -246,6 +264,18 @@ func (a *Authority) Revoke(rawToken string) { // same hash the Authority stores without ever holding the raw token. func HashToken(rawToken string) string { return hashToken(rawToken) } +// WorkerID returns a stable, non-secret worker identity derived from a raw +// member token. A service derives the submitter's identity from the +// authenticated token rather than trusting a self-declared name, so a member +// cannot submit (or be paid) as another worker. Empty token -> "". +func WorkerID(rawToken string) string { + rawToken = strings.TrimSpace(rawToken) + if rawToken == "" { + return "" + } + return "w-" + hashToken(rawToken)[:16] +} + // Mint issues a member token for groupID WITHOUT the device-auth flow, for // services where a settled payment — not an owner approval — is the // membership decision. The raw token is returned exactly once; only its hash diff --git a/internal/research/groupauth/groupauth_test.go b/internal/research/groupauth/groupauth_test.go index 421b1cca..f9a5ee5b 100644 --- a/internal/research/groupauth/groupauth_test.go +++ b/internal/research/groupauth/groupauth_test.go @@ -96,3 +96,49 @@ func TestVerify_UnknownToken(t *testing.T) { t.Error("unknown token must not verify") } } + +func TestRequestCode_SweepsExpired(t *testing.T) { + a := New() + clk := time.Now() + a.now = func() time.Time { return clk } + + g, err := a.RequestCode("w1") + if err != nil { + t.Fatalf("RequestCode: %v", err) + } + // Let the first code expire, then request another — the sweep evicts it. + clk = clk.Add(CodeExpiry + time.Minute) + if _, err := a.RequestCode("w2"); err != nil { + t.Fatalf("RequestCode 2: %v", err) + } + + // The expired code is gone from the indexes (not merely flagged expired). + if _, err := a.Poll(g.DeviceCode); err != ErrNotFound { + t.Fatalf("expired code poll = %v, want ErrNotFound (swept)", err) + } + a.mu.Lock() + n := len(a.byDevice) + len(a.byUser) + a.mu.Unlock() + if n != 2 { // only the live w2 code remains, in both indexes + t.Fatalf("index entries = %d, want 2 (expired code swept)", n) + } +} + +func TestWorkerID(t *testing.T) { + id := WorkerID("tok-abc") + if id == "" || id != WorkerID("tok-abc") { + t.Fatalf("WorkerID not stable for the same token: %q", id) + } + if WorkerID("tok-abc") == WorkerID("tok-xyz") { + t.Fatal("distinct tokens must produce distinct WorkerIDs (no impersonation)") + } + if id == "tok-abc" || strings.Contains(id, "tok-abc") { + t.Fatalf("WorkerID %q leaked the raw token", id) + } + if WorkerID("") != "" { + t.Fatal("empty token must yield empty WorkerID") + } + if !strings.HasPrefix(id, "w-") { + t.Fatalf("WorkerID = %q, want a w- prefix", id) + } +} diff --git a/internal/research/kb/kb.go b/internal/research/kb/kb.go index adf0690d..b1b815b8 100644 --- a/internal/research/kb/kb.go +++ b/internal/research/kb/kb.go @@ -218,13 +218,28 @@ func (k *KB) Payouts() map[string]float64 { } // ByImpact (default): proportional to accepted impact. - total := 0.0 per := map[string]float64{} - for _, r := range k.results { - if r.Accepted && r.Impact > 0 { - per[r.Worker] += r.Impact - total += r.Impact + if k.prog.Criteria.Accept == Threshold { + // Threshold mode accepts EVERY passing result, so summing per-worker + // lets a worker inflate their share by resubmitting the same passing + // result. Credit each worker's BEST accepted impact only. (BeatsChampion + // is already safe: a duplicate is never better than the champion, so it + // is not accepted — there the sum reflects genuine cumulative progress.) + for _, r := range k.results { + if r.Accepted && r.Impact > per[r.Worker] { + per[r.Worker] = r.Impact + } } + } else { + for _, r := range k.results { + if r.Accepted && r.Impact > 0 { + per[r.Worker] += r.Impact + } + } + } + total := 0.0 + for _, imp := range per { + total += imp } if total <= 0 { return out diff --git a/internal/research/kb/kb_test.go b/internal/research/kb/kb_test.go index a6985c92..6159c7eb 100644 --- a/internal/research/kb/kb_test.go +++ b/internal/research/kb/kb_test.go @@ -17,6 +17,35 @@ func minimizeProg(split SplitMode, baseline *float64) Program { } } +func thresholdProg(split SplitMode, threshold *float64) Program { + return Program{ + ID: "threshold-prog", + Criteria: Criteria{Metric: "val_bpb", Direction: Minimize, Accept: Threshold, Threshold: threshold}, + Pool: 100, + Split: split, + } +} + +// TestPayouts_ThresholdResubmitCannotInflate is the H4 regression: under +// Threshold + ByImpact, resubmitting the same passing result must NOT inflate a +// worker's share. Each worker is credited their BEST accepted impact, not the +// sum of duplicates. +func TestPayouts_ThresholdResubmitCannotInflate(t *testing.T) { + k := New(thresholdProg(ByImpact, f(1.20))) // minimize: value <= 1.20 passes + + a1, _ := k.Submit("spark1", 1.10, "") // spark1 clears it... + a2, _ := k.Submit("spark1", 1.10, "") // ...and resubmits the SAME result. + b1, _ := k.Submit("spark2", 1.10, "") // spark2 clears it once, equal impact. + if !a1.Accepted || !a2.Accepted || !b1.Accepted { + t.Fatalf("threshold submissions = %+v %+v %+v, want all accepted", a1, a2, b1) + } + + pay := k.Payouts() + if math.Abs(pay["spark1"]-50) > 1e-3 || math.Abs(pay["spark2"]-50) > 1e-3 { + t.Fatalf("payouts = %+v, want 50/50 — resubmission must not inflate (was 66.67/33.33)", pay) + } +} + func TestSubmit_BeatsChampion_Minimize(t *testing.T) { k := New(minimizeProg(ByImpact, f(1.20))) // baseline val_bpb 1.20 diff --git a/internal/research/server/server.go b/internal/research/server/server.go index 9da23241..f17ebe13 100644 --- a/internal/research/server/server.go +++ b/internal/research/server/server.go @@ -157,7 +157,6 @@ func (s *Server) handleChampion(w http.ResponseWriter, _ *http.Request) { func (s *Server) handleResults(w http.ResponseWriter, r *http.Request) { var body struct { - Worker string `json:"worker"` Value float64 `json:"value"` Output string `json:"output"` } @@ -165,13 +164,17 @@ func (s *Server) handleResults(w http.ResponseWriter, r *http.Request) { writeErr(w, http.StatusBadRequest, "invalid_request", "bad result body") return } - res, err := s.store.Submit(body.Worker, body.Value, body.Output) + // Worker identity is the AUTHENTICATED token, never a self-declared body + // field — otherwise any member could submit (and be paid out) as another + // worker. The member middleware has already validated this token. + worker := groupauth.WorkerID(bearer(r)) + res, err := s.store.Submit(worker, body.Value, body.Output) if err != nil { writeErr(w, http.StatusBadRequest, "invalid_result", err.Error()) return } s.log.Info("result submitted", - "worker", body.Worker, "value", body.Value, + "worker", worker, "value", body.Value, "accepted", res.Accepted, "champion", res.Champion, "impact", res.Impact) writeJSON(w, http.StatusOK, res) } diff --git a/internal/schemas/payment.go b/internal/schemas/payment.go index 21d96a0c..62111bad 100644 --- a/internal/schemas/payment.go +++ b/internal/schemas/payment.go @@ -212,3 +212,28 @@ func ApproximateRequestPriceFromPerHour(perHour string) (string, error) { return value.Mul(approxMinutesPerRequestDecimal).Div(minutesPerHour).String(), nil } + +// MebibyteBytes is the divisor for perMB dataset pricing: one "MB" of dataset +// pricing is a mebibyte (2^20 bytes), matching how artifact file sizes are +// reported. Documented so buyers and sellers agree on the unit. +const MebibyteBytes = 1 << 20 + +var mebibyteBytesDecimal = decimal.NewFromInt(MebibyteBytes) + +// TotalPriceFromPerMB converts a per-megabyte dataset price into the TOTAL +// per-request x402 charge for the whole artifact: perMB * (sizeBytes / 2^20). +// A dataset is bought once, so the enforced request price is the full total — +// returning the raw perMB would charge a single megabyte's worth for the whole +// dataset. An empty price or non-positive size yields "0". +func TotalPriceFromPerMB(perMB string, sizeBytes int64) (string, error) { + perMB = strings.TrimSpace(perMB) + if perMB == "" || sizeBytes <= 0 { + return "0", nil + } + value, err := decimal.NewFromString(perMB) + if err != nil { + return "", err + } + megabytes := decimal.NewFromInt(sizeBytes).Div(mebibyteBytesDecimal) + return value.Mul(megabytes).String(), nil +} diff --git a/internal/schemas/payment_test.go b/internal/schemas/payment_test.go index 109966ab..8208a60c 100644 --- a/internal/schemas/payment_test.go +++ b/internal/schemas/payment_test.go @@ -87,6 +87,39 @@ func TestApproximateRequestPriceFromPerMTok_Invalid(t *testing.T) { } } +func TestTotalPriceFromPerMB(t *testing.T) { + const mib = 1 << 20 + // 100 MiB at 0.01/MB → 1.00 total, NOT 0.01 (the bug was returning raw perMB). + got, err := TotalPriceFromPerMB("0.01", 100*mib) + if err != nil { + t.Fatalf("TotalPriceFromPerMB() error = %v", err) + } + if got != "1" { + t.Errorf("TotalPriceFromPerMB(0.01, 100MiB) = %q, want %q", got, "1") + } + // 1000 MiB at 0.01/MB → 10. + if got, _ := TotalPriceFromPerMB("0.01", 1000*mib); got != "10" { + t.Errorf("TotalPriceFromPerMB(0.01, 1000MiB) = %q, want %q", got, "10") + } + // Exactly 1 MiB → the raw perMB (the boundary the old test pinned). + if got, _ := TotalPriceFromPerMB("0.01", mib); got != "0.01" { + t.Errorf("TotalPriceFromPerMB(0.01, 1MiB) = %q, want %q", got, "0.01") + } + // Empty price or non-positive size → "0". + if got, _ := TotalPriceFromPerMB("", 100); got != "0" { + t.Errorf("empty perMB = %q, want 0", got) + } + if got, _ := TotalPriceFromPerMB("0.01", 0); got != "0" { + t.Errorf("zero size = %q, want 0", got) + } +} + +func TestTotalPriceFromPerMB_Invalid(t *testing.T) { + if _, err := TotalPriceFromPerMB("bad", 100); err == nil { + t.Fatal("TotalPriceFromPerMB() error = nil, want non-nil") + } +} + func TestPaymentTerms_JSONRoundTrip(t *testing.T) { original := PaymentTerms{ Network: "base-sepolia", diff --git a/internal/schemas/service-catalog.schema.json b/internal/schemas/service-catalog.schema.json index 1184341a..91ff2c85 100644 --- a/internal/schemas/service-catalog.schema.json +++ b/internal/schemas/service-catalog.schema.json @@ -173,6 +173,11 @@ "minLength": 1, "description": "type=dataset only: content-address anchor (export bundle manifestHash, SHA-256) of the sold artifact." }, + "datasetFileHash": { + "type": "string", + "minLength": 1, + "description": "type=dataset only: SHA-256 of the served artifact file, so a router-discovered buyer can verify downloaded bytes offline." + }, "datasetVersion": { "type": "string", "minLength": 1, diff --git a/internal/schemas/service_catalog.go b/internal/schemas/service_catalog.go index 5a1baef7..82773e06 100644 --- a/internal/schemas/service_catalog.go +++ b/internal/schemas/service_catalog.go @@ -60,6 +60,7 @@ type ServiceCatalogEntry struct { // know exactly which artifact (and version) an offer sells. Additive // only — see the stable-wire-schema note above. DatasetManifestHash string `json:"datasetManifestHash,omitempty"` + DatasetFileHash string `json:"datasetFileHash,omitempty"` DatasetVersion string `json:"datasetVersion,omitempty"` DatasetSizeBytes int64 `json:"datasetSizeBytes,omitempty"` } diff --git a/internal/serviceoffercontroller/bounty.go b/internal/serviceoffercontroller/bounty.go index 5c89afc2..57b2783e 100644 --- a/internal/serviceoffercontroller/bounty.go +++ b/internal/serviceoffercontroller/bounty.go @@ -336,9 +336,23 @@ func (c *Controller) reconcileBounty(ctx context.Context, key string) error { } } - // 7. Payout — Verified + a held escrow → capture to the fulfiller. + // 7. Payout — Verified + a held escrow → capture to the ACCEPTED FULFILLER. if bountyConditionIsTrue(status.Conditions, "Verified") && status.EscrowState == escrow.StateReserved { - receipt, err := c.escrowGateway().Capture(ctx, string(sb.UID)) + // Bind the capture to the fulfiller's exact (address, amount) seat so the + // facilitator settles only the signed payout to the signed recipient — + // never "all voucher seats" to whoever the poster pre-signed. A voucher + // missing the fulfiller's seat fails closed (no mispayment) rather than + // paying the wrong party. + var receipt escrow.Receipt + var err error + if batch, ok := c.escrowGateway().(escrow.BatchGateway); ok && len(status.Claims) > 0 && status.Claims[0].FulfillerAddress != "" { + receipt, err = batch.CaptureBatch(ctx, string(sb.UID), []escrow.BatchRecipient{{ + Address: status.Claims[0].FulfillerAddress, + Amount: sb.Spec.Reward.Amount, + }}) + } else { + receipt, err = c.escrowGateway().Capture(ctx, string(sb.UID)) + } if err != nil { if isEscrowVoucherRefusal(err) { // The facilitator wants a (fresh) Permit2 voucher before it diff --git a/internal/serviceoffercontroller/bounty_escalation_test.go b/internal/serviceoffercontroller/bounty_escalation_test.go index 88f9f421..76cd605f 100644 --- a/internal/serviceoffercontroller/bounty_escalation_test.go +++ b/internal/serviceoffercontroller/bounty_escalation_test.go @@ -99,6 +99,17 @@ func (f *fakeEscrowGateway) lastReserve(t *testing.T, id string) escrow.ReserveR return reqs[len(reqs)-1] } +func (f *fakeEscrowGateway) lastBatch(t *testing.T, id string) []escrow.BatchRecipient { + t.Helper() + f.mu.Lock() + defer f.mu.Unlock() + b, ok := f.batches[id] + if !ok { + t.Fatalf("no CaptureBatch recorded for %s", id) + } + return b +} + // fakeValidationReader is the grounding chain fake. type fakeValidationReader struct { statuses map[common.Hash]erc8004.ValidationStatus diff --git a/internal/serviceoffercontroller/bounty_lifecycle_test.go b/internal/serviceoffercontroller/bounty_lifecycle_test.go index c6bda966..87c16cee 100644 --- a/internal/serviceoffercontroller/bounty_lifecycle_test.go +++ b/internal/serviceoffercontroller/bounty_lifecycle_test.go @@ -376,6 +376,13 @@ func TestBountyLifecycle_RewardVoucherFerry(t *testing.T) { if sb.Status.Phase != bountyPhasePaid { t.Fatalf("phase = %q, want Paid", sb.Status.Phase) } + // H2: the reward is captured to the ACCEPTED FULFILLER's explicit seat — a + // bound (address, amount) recipient — not "all voucher seats" to whoever + // the poster pre-signed. + batch := fake.lastBatch(t, "uid-ferry") + if len(batch) != 1 || !strings.EqualFold(batch[0].Address, "0x2222222222222222222222222222222222222222") { + t.Fatalf("reward capture recipients = %+v, want a single seat bound to the fulfiller 0x2222…", batch) + } } func TestBountyLifecycle_BondAndEvalVoucherFerry(t *testing.T) { diff --git a/internal/serviceoffercontroller/render.go b/internal/serviceoffercontroller/render.go index af6ac4a3..72c467e2 100644 --- a/internal/serviceoffercontroller/render.go +++ b/internal/serviceoffercontroller/render.go @@ -1157,8 +1157,17 @@ func buildServiceCatalogJSON(offers []*monetizeapi.ServiceOffer, baseURL string) asset := offerAssetJSON(offer) if asset != nil { svc.Asset = asset - if raw != "" && asset.Decimals > 0 { - svc.PriceAtomicUnits = decimalToAtomicString(raw, int(asset.Decimals)) + // PriceAtomicUnits is what the buyer actually pays. For perMB that + // is the TOTAL (rate × dataset size), matching the x402 gate — the + // raw perMB stays the displayed per-megabyte RATE in PriceRaw. + charged := raw + if unit == "perMB" { + if total, terr := schemas.TotalPriceFromPerMB(raw, offer.Spec.Dataset.SizeBytes); terr == nil { + charged = total + } + } + if charged != "" && asset.Decimals > 0 { + svc.PriceAtomicUnits = decimalToAtomicString(charged, int(asset.Decimals)) } } @@ -1166,6 +1175,7 @@ func buildServiceCatalogJSON(offers []*monetizeapi.ServiceOffer, baseURL string) // on discovery, mirroring how Model is surfaced for inference/agent. if offer.IsDataset() { svc.DatasetManifestHash = offer.Spec.Dataset.ManifestHash + svc.DatasetFileHash = offer.Spec.Dataset.FileHash svc.DatasetVersion = offer.Spec.Dataset.Version svc.DatasetSizeBytes = offer.Spec.Dataset.SizeBytes } diff --git a/internal/serviceoffercontroller/render_test.go b/internal/serviceoffercontroller/render_test.go index 395200eb..e941f6f7 100644 --- a/internal/serviceoffercontroller/render_test.go +++ b/internal/serviceoffercontroller/render_test.go @@ -861,6 +861,7 @@ func TestBuildServiceCatalogJSON_DatasetOfferSurfacesVersion(t *testing.T) { Type: "dataset", Dataset: monetizeapi.ServiceOfferDataset{ ManifestHash: "abc123", + FileHash: "def456", Version: "2", SizeBytes: 1048576, }, @@ -895,6 +896,9 @@ func TestBuildServiceCatalogJSON_DatasetOfferSurfacesVersion(t *testing.T) { if svc.DatasetManifestHash != "abc123" { t.Errorf("datasetManifestHash = %q, want abc123", svc.DatasetManifestHash) } + if svc.DatasetFileHash != "def456" { + t.Errorf("datasetFileHash = %q, want def456 (content-integrity anchor must federate)", svc.DatasetFileHash) + } if svc.DatasetVersion != "2" { t.Errorf("datasetVersion = %q, want 2", svc.DatasetVersion) } diff --git a/internal/x402/escrow/server.go b/internal/x402/escrow/server.go index dc6b5488..5049a26e 100644 --- a/internal/x402/escrow/server.go +++ b/internal/x402/escrow/server.go @@ -187,6 +187,18 @@ func (s *Server) handleReserve(w http.ResponseWriter, r *http.Request) { writeJSON(w, entry.Receipt) return } + // A re-reserve of an already-held id must not silently REPLACE the signed + // settlement terms: an identical request is idempotent success, but a + // different voucher/amount under the same id is a conflict, not an + // overwrite (Reserve is specified idempotent). + if exists && entry.State == StateReserved { + if sameReserveRequest(entry.Request, &req) { + writeJSON(w, entry.Receipt) + return + } + http.Error(w, "escrow id already reserved with different settlement terms", http.StatusConflict) + return + } var receipt Receipt if req.Voucher == nil { @@ -226,6 +238,19 @@ func (s *Server) handleReserve(w http.ResponseWriter, r *http.Request) { writeJSON(w, receipt) } +// sameReserveRequest reports whether two reserve requests carry identical +// settlement terms, so a retry is treated as idempotent success rather than a +// silent overwrite of the signed voucher. Compared by canonical JSON (struct +// fields marshal in declaration order, so the encoding is deterministic). +func sameReserveRequest(a, b *ReserveRequest) bool { + if a == nil || b == nil { + return a == b + } + ja, err1 := json.Marshal(a) + jb, err2 := json.Marshal(b) + return err1 == nil && err2 == nil && string(ja) == string(jb) +} + // captureRequest is the optional capture body. HTTPGateway.Capture sends no // body (capture every voucher seat); HTTPGateway.CaptureBatch sends // {"recipients":[...]} (capture the subset, omitted seats unpaid). diff --git a/internal/x402/escrow/server_test.go b/internal/x402/escrow/server_test.go index 80348366..61e79f8c 100644 --- a/internal/x402/escrow/server_test.go +++ b/internal/x402/escrow/server_test.go @@ -99,6 +99,34 @@ func TestServer_ReserveAwaitingThenReserved(t *testing.T) { } } +// A re-reserve of a held id with DIFFERENT terms must 409, not silently +// overwrite the signed voucher; an identical re-reserve stays idempotent. +func TestServer_ReserveConflictOnDifferentTerms(t *testing.T) { + sub := &fakeSubmitter{} + _, g, _ := newTestServer(t, ServerOptions{Token: "secret", Spender: testSpender, Networks: []string{"base", "base-sepolia"}, Submitter: sub}) + ctx := context.Background() + + v := signedTestVoucher(t) + req := ReserveRequest{ID: "c1", Network: "base-sepolia", Voucher: &v} + if r, err := g.Reserve(ctx, req); err != nil || r.State != StateReserved { + t.Fatalf("initial reserve = %+v, %v; want Reserved", r, err) + } + // Identical re-reserve is idempotent success. + if r, err := g.Reserve(ctx, req); err != nil || r.State != StateReserved { + t.Fatalf("idempotent re-reserve = %+v, %v; want Reserved", r, err) + } + // A different (validly-signed) voucher under the same id is a conflict. + v2, key2 := goldenVoucher(t) + v2.Deadline = time.Now().Add(time.Hour).Unix() + v2.Nonce = "424242" + if err := SignVoucher(&v2, big.NewInt(84532), key2); err != nil { + t.Fatal(err) + } + if _, err := g.Reserve(ctx, ReserveRequest{ID: "c1", Network: "base-sepolia", Voucher: &v2}); err == nil || !strings.Contains(err.Error(), "409") { + t.Fatalf("conflicting re-reserve = %v, want 409", err) + } +} + func TestServer_ReserveRejectsBadVouchers(t *testing.T) { _, g, _ := newTestServer(t, ServerOptions{Token: "secret", Spender: testSpender, Networks: []string{"base-sepolia"}, Submitter: &fakeSubmitter{}}) ctx := context.Background() diff --git a/internal/x402/serviceoffer_source.go b/internal/x402/serviceoffer_source.go index 77a852f7..6c9925d7 100644 --- a/internal/x402/serviceoffer_source.go +++ b/internal/x402/serviceoffer_source.go @@ -250,7 +250,14 @@ func effectivePrice(offer *monetizeapi.ServiceOffer) (price, priceModel, perMTok case offer.Spec.Payment.Price.PerHour != "": return offer.Spec.Payment.Price.PerHour, "perHour", "", 0, nil case offer.Spec.Payment.Price.PerMB != "": - return offer.Spec.Payment.Price.PerMB, "perMB", "", 0, nil + // A dataset is bought once, so the enforced per-request price is the + // TOTAL: perMB × (sizeBytes / 1e6). Returning the raw perMB would charge + // a single megabyte's worth for the entire dataset. + total, err := schemas.TotalPriceFromPerMB(offer.Spec.Payment.Price.PerMB, offer.Spec.Dataset.SizeBytes) + if err != nil { + return "", "", "", 0, fmt.Errorf("invalid perMB price %q: %w", offer.Spec.Payment.Price.PerMB, err) + } + return total, "perMB", "", 0, nil default: return "0", "", "", 0, nil } diff --git a/internal/x402/serviceoffer_source_test.go b/internal/x402/serviceoffer_source_test.go index 4fef21c8..7033c42b 100644 --- a/internal/x402/serviceoffer_source_test.go +++ b/internal/x402/serviceoffer_source_test.go @@ -213,7 +213,7 @@ func TestRouteRuleFromOffer_DatasetAdvertisesDatasetMetadata(t *testing.T) { ManifestHash: "ABC123", Version: "2", FileHash: "DEF456", - SizeBytes: 1048576, + SizeBytes: 50 * 1048576, // 50 MiB }, Payment: monetizeapi.ServiceOfferPayment{ Network: "base-sepolia", @@ -239,11 +239,13 @@ func TestRouteRuleFromOffer_DatasetAdvertisesDatasetMetadata(t *testing.T) { if route.DatasetFileHash != "def456" { t.Errorf("DatasetFileHash = %q, want def456 (lowercased)", route.DatasetFileHash) } - if route.DatasetSizeBytes != 1048576 { - t.Errorf("DatasetSizeBytes = %d, want 1048576", route.DatasetSizeBytes) + if route.DatasetSizeBytes != 50*1048576 { + t.Errorf("DatasetSizeBytes = %d, want %d", route.DatasetSizeBytes, 50*1048576) } - if route.Price != "0.01" { - t.Errorf("Price = %q, want 0.01 (from perMB)", route.Price) + // perMB is the TOTAL for the whole artifact: 0.01/MB × 50 MiB = 0.5, + // NOT the raw 0.01 (the H3 bug charged one megabyte for the whole dataset). + if route.Price != "0.5" { + t.Errorf("Price = %q, want 0.5 (0.01/MB × 50 MiB total)", route.Price) } if route.Pattern != "/services/pi-sessions/*" { t.Errorf("Pattern = %q, want /services/pi-sessions/*", route.Pattern) From f9175dfad0c633a86443c25f96a6307f3f983874 Mon Sep 17 00:00:00 2001 From: bussyjd <145845+bussyjd@users.noreply.github.com> Date: Tue, 16 Jun 2026 16:36:16 +0400 Subject: [PATCH 04/11] security(x402): gate payment proofs on secure transport, opt-in via --secure Direct peer-to-peer inference accepts plaintext by default; 'obol sell inference --secure' sets RequireSecurePayment and the always-behind-TLS cluster verifier enforces the gate. (Squashes the original always-on gate with its reversal to opt-in.) --- cmd/obol/sell.go | 78 +++++++++++++++++-------------- internal/inference/gateway.go | 10 ++++ internal/inference/store.go | 5 ++ internal/x402/forwardauth.go | 44 +++++++++++++++++ internal/x402/forwardauth_test.go | 59 +++++++++++++++++++++++ internal/x402/verifier.go | 22 +++++---- internal/x402/verifier_test.go | 13 ++++++ 7 files changed, 187 insertions(+), 44 deletions(-) diff --git a/cmd/obol/sell.go b/cmd/obol/sell.go index fed6d1e0..6e2b661f 100644 --- a/cmd/obol/sell.go +++ b/cmd/obol/sell.go @@ -210,6 +210,10 @@ Examples: Usage: "Upstream Ollama URL", Value: "http://localhost:11434", }, + &cli.BoolFlag{ + Name: "secure", + Usage: "Require payment proofs over a secure transport (TLS / tunnel / loopback). Default allows direct plaintext peer-to-peer; the router-mediated secure path uses this.", + }, &cli.StringFlag{ Name: "enclave-tag", Aliases: []string{"e"}, @@ -437,26 +441,27 @@ Examples: } d := &inference.Deployment{ - Name: name, - EnclaveTag: cmd.String("enclave-tag"), - ListenAddr: cmd.String("listen"), - UpstreamURL: upstreamFlag, - WalletAddress: wallet, - PricePerRequest: perRequest, - PricePerMTok: priceTable.PerMTok, - AssetSymbol: assetSymbol, - Chain: chainName, - FacilitatorURL: cmd.String("facilitator"), - VMMode: cmd.Bool("vm"), - VMImage: cmd.String("vm-image"), - VMCPUs: cmd.Int("vm-cpus"), - VMMemoryMB: cmd.Int("vm-memory"), - VMHostPort: cmd.Int("vm-host-port"), - TEEType: teeType, - ModelHash: modelHash, - ModelName: modelFlag, - ServiceNamespace: "llm", - Registration: persistedRegistration, + Name: name, + EnclaveTag: cmd.String("enclave-tag"), + ListenAddr: cmd.String("listen"), + UpstreamURL: upstreamFlag, + WalletAddress: wallet, + PricePerRequest: perRequest, + PricePerMTok: priceTable.PerMTok, + AssetSymbol: assetSymbol, + Chain: chainName, + FacilitatorURL: cmd.String("facilitator"), + VMMode: cmd.Bool("vm"), + VMImage: cmd.String("vm-image"), + VMCPUs: cmd.Int("vm-cpus"), + VMMemoryMB: cmd.Int("vm-memory"), + VMHostPort: cmd.Int("vm-host-port"), + TEEType: teeType, + ModelHash: modelHash, + ModelName: modelFlag, + ServiceNamespace: "llm", + Registration: persistedRegistration, + RequireSecurePayment: cmd.Bool("secure"), } if pf := cmd.String("provenance-file"); pf != "" { @@ -3474,22 +3479,23 @@ func sleepWithContext(ctx context.Context, d time.Duration) bool { // runInferenceGateway starts the x402 inference gateway and blocks until shutdown. func runInferenceGateway(u *ui.UI, d *inference.Deployment, chain x402verifier.ChainInfo) error { gw, err := inference.NewGateway(inference.GatewayConfig{ - ListenAddr: d.ListenAddr, - UpstreamURL: d.UpstreamURL, - WalletAddress: d.WalletAddress, - PricePerRequest: d.PricePerRequest, - AssetSymbol: d.AssetSymbol, - Chain: chain, - FacilitatorURL: d.FacilitatorURL, - EnclaveTag: d.EnclaveTag, - VMMode: d.VMMode, - VMImage: d.VMImage, - VMCPUs: d.VMCPUs, - VMMemoryMB: d.VMMemoryMB, - VMHostPort: d.VMHostPort, - TEEType: d.TEEType, - ModelHash: d.ModelHash, - NoPaymentGate: d.NoPaymentGate, + ListenAddr: d.ListenAddr, + UpstreamURL: d.UpstreamURL, + WalletAddress: d.WalletAddress, + PricePerRequest: d.PricePerRequest, + AssetSymbol: d.AssetSymbol, + Chain: chain, + FacilitatorURL: d.FacilitatorURL, + EnclaveTag: d.EnclaveTag, + VMMode: d.VMMode, + VMImage: d.VMImage, + VMCPUs: d.VMCPUs, + VMMemoryMB: d.VMMemoryMB, + VMHostPort: d.VMHostPort, + TEEType: d.TEEType, + ModelHash: d.ModelHash, + NoPaymentGate: d.NoPaymentGate, + RequireSecurePayment: d.RequireSecurePayment, }) if err != nil { return fmt.Errorf("failed to create gateway: %w", err) diff --git a/internal/inference/gateway.go b/internal/inference/gateway.go index 9bebc42b..bcd63d3d 100644 --- a/internal/inference/gateway.go +++ b/internal/inference/gateway.go @@ -106,6 +106,13 @@ type GatewayConfig struct { // ForwardAuth) to avoid double-gating requests. Enclave/TEE encryption // middleware remains active when enabled. NoPaymentGate bool + + // RequireSecurePayment, when true, rejects payment proofs that did not + // arrive over a secure transport (TLS / X-Forwarded-Proto=https / loopback + // / private IP). Default false: a direct, un-tunneled peer-to-peer buyer + // over plaintext HTTP is accepted. `obol sell inference --secure` sets it + // (the router-mediated secure posture). + RequireSecurePayment bool } // Gateway is an x402-enabled reverse proxy for LLM inference with optional @@ -190,6 +197,9 @@ func (g *Gateway) buildHandler(upstreamURL string) (http.Handler, error) { // so a configured VerifyOnly=false is correct by design — suppress the // misleading per-request warning on this path. SettlesInProcess: true, + // Direct peer-to-peer is insecure by default so an un-tunneled buyer + // works out of the box; --secure flips this on. + RequireSecureTransport: g.config.RequireSecurePayment, }, []x402types.PaymentRequirements{requirement}) // Initialise key backend: TEE (Linux) or SE (macOS), mutually exclusive. diff --git a/internal/inference/store.go b/internal/inference/store.go index 5e0d99a1..70dd8981 100644 --- a/internal/inference/store.go +++ b/internal/inference/store.go @@ -97,6 +97,11 @@ type Deployment struct { // gateway runs behind the cluster's x402 verifier to avoid double-gating. NoPaymentGate bool `json:"no_payment_gate,omitempty"` + // RequireSecurePayment, when true (`obol sell inference --secure`), rejects + // payment proofs not sent over a secure transport. Default false: direct, + // un-tunneled peer-to-peer buyers over plaintext HTTP are accepted. + RequireSecurePayment bool `json:"require_secure_payment,omitempty"` + // Provenance holds optional metadata about how the model was produced // (e.g. autoresearch experiment results). Stored alongside the deployment // config and passed to the registration document when selling. diff --git a/internal/x402/forwardauth.go b/internal/x402/forwardauth.go index c465326f..0cfc3295 100644 --- a/internal/x402/forwardauth.go +++ b/internal/x402/forwardauth.go @@ -11,11 +11,36 @@ import ( "log" "net" "net/http" + "strings" "time" x402types "github.com/x402-foundation/x402/go/types" ) +// requestIsSecure reports whether an X-PAYMENT-bearing request reached us over a +// transport on which the proof cannot be trivially sniffed or replayed: direct +// TLS, a TLS-terminating proxy/tunnel (X-Forwarded-Proto=https), or a +// loopback/cluster-internal hop (cloudflared→127.0.0.1, Traefik→pod IP). A +// payment submitted directly over plaintext HTTP to a publicly-bound gateway +// returns false. +func requestIsSecure(r *http.Request) bool { + if r.TLS != nil { + return true + } + if strings.EqualFold(r.Header.Get("X-Forwarded-Proto"), "https") { + return true + } + host := r.RemoteAddr + if h, _, err := net.SplitHostPort(host); err == nil { + host = h + } + if ip := net.ParseIP(host); ip != nil { + return ip.IsLoopback() || ip.IsPrivate() + } + // A non-IP remote (unix socket, in-process httptest) is treated as local. + return host == "" || strings.EqualFold(host, "localhost") +} + // ForwardAuthConfig configures the ForwardAuth x402 middleware. type ForwardAuthConfig struct { // FacilitatorURL is the x402 facilitator service URL (e.g., "https://x402.org/facilitator"). @@ -56,6 +81,15 @@ type ForwardAuthConfig struct { // change settlement behaviour; the genuinely-dangerous Traefik ForwardAuth // path leaves this false and still warns if an operator flips VerifyOnly. SettlesInProcess bool + + // RequireSecureTransport rejects X-PAYMENT proofs that did not arrive over a + // secure transport (TLS / X-Forwarded-Proto=https / loopback / private IP). + // It is OPT-IN: the default (false) accepts payment over any transport so + // direct, un-tunneled peer-to-peer inference works out of the box. The + // cluster verifier (always behind Traefik, which terminates TLS) sets this + // true for free defense-in-depth; the standalone seller turns it on with + // `obol sell inference --secure` (the router-mediated secure posture). + RequireSecureTransport bool } // facilitatorVerifyRequest is the JSON body sent to POST /verify and /settle. @@ -127,6 +161,16 @@ func NewForwardAuthMiddleware(cfg ForwardAuthConfig, requirements []x402types.Pa return } + // Opt-in only: when RequireSecureTransport is set, a payment proof + // must arrive over a secure transport (TLS / X-Forwarded-Proto=https + // / loopback / private IP) — a plaintext proof can be sniffed and + // replayed. The default accepts any transport so direct, un-tunneled + // peer-to-peer inference works out of the box. + if cfg.RequireSecureTransport && !requestIsSecure(r) { + http.Error(w, "x402: payment proof must be sent over a secure transport (HTTPS)", http.StatusBadRequest) + return + } + // Decode the base64-encoded payment payload. payloadBytes, err := base64.StdEncoding.DecodeString(paymentHeader) if err != nil { diff --git a/internal/x402/forwardauth_test.go b/internal/x402/forwardauth_test.go index afc2d5f3..dbc7b473 100644 --- a/internal/x402/forwardauth_test.go +++ b/internal/x402/forwardauth_test.go @@ -2,6 +2,7 @@ package x402 import ( "bytes" + "crypto/tls" "encoding/base64" "encoding/json" "io" @@ -606,3 +607,61 @@ func TestForwardAuth_SettlesInProcess_SuppressesWarning(t *testing.T) { t.Fatalf("SettlesInProcess=true must suppress the verifyOnly=false warning, got:\n%s", gotLog) } } + +func TestRequestIsSecure(t *testing.T) { + mk := func(tlsOn bool, xfp, remote string) *http.Request { + r := httptest.NewRequest(http.MethodPost, "/", nil) + r.RemoteAddr = remote + if xfp != "" { + r.Header.Set("X-Forwarded-Proto", xfp) + } + if tlsOn { + r.TLS = &tls.ConnectionState{} + } + return r + } + cases := []struct { + name string + req *http.Request + want bool + }{ + {"direct TLS", mk(true, "", "1.2.3.4:443"), true}, + {"x-forwarded-proto https", mk(false, "https", "203.0.113.7:80"), true}, + {"loopback hop", mk(false, "", "127.0.0.1:8402"), true}, + {"cluster-internal hop", mk(false, "", "10.42.0.5:80"), true}, + {"plaintext public", mk(false, "", "203.0.113.7:80"), false}, + {"forwarded-proto http public", mk(false, "http", "203.0.113.7:80"), false}, + } + for _, c := range cases { + if got := requestIsSecure(c.req); got != c.want { + t.Errorf("%s: requestIsSecure = %v, want %v", c.name, got, c.want) + } + } +} + +// A payment proof submitted over plaintext HTTP directly to a publicly-bound +// gateway must be rejected before any facilitator call (defense-in-depth). +func TestForwardAuth_SecureMode_RejectsPlaintextPayment(t *testing.T) { + // Opt-in: with RequireSecureTransport a plaintext-public payment is rejected + // before any facilitator call. The DEFAULT (no RequireSecureTransport) + // accepts it — the happy-path tests above all run over httptest's 192.0.2.1 + // plaintext remote with default config and succeed, which is the insecure + // direct peer-to-peer default. + req := BuildV2Requirement(ChainBaseSepolia, "0.01", "0x1111111111111111111111111111111111111111", 0) + mw := NewForwardAuthMiddleware(ForwardAuthConfig{ + FacilitatorURL: "https://x402.gcp.obol.tech", + VerifyOnly: true, + RequireSecureTransport: true, + }, []x402types.PaymentRequirements{req}) + h := mw(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { w.WriteHeader(http.StatusOK) })) + + r := httptest.NewRequest(http.MethodPost, "http://gw.example/", nil) + r.RemoteAddr = "203.0.113.7:80" // public, plaintext, no X-Forwarded-Proto + r.Header.Set("X-PAYMENT", base64.StdEncoding.EncodeToString([]byte(`{"x402Version":1}`))) + w := httptest.NewRecorder() + h.ServeHTTP(w, r) + + if w.Code != http.StatusBadRequest || !strings.Contains(w.Body.String(), "secure transport") { + t.Fatalf("plaintext payment = %d %q, want 400 secure-transport rejection", w.Code, strings.TrimSpace(w.Body.String())) + } +} diff --git a/internal/x402/verifier.go b/internal/x402/verifier.go index 48adb307..22f285f1 100644 --- a/internal/x402/verifier.go +++ b/internal/x402/verifier.go @@ -183,10 +183,14 @@ func (v *Verifier) HandleVerify(w http.ResponseWriter, r *http.Request) { display := buildPaymentDisplay(rule, chain, asset, wallet, requirement.Amount) middleware := NewForwardAuthMiddleware(ForwardAuthConfig{ - FacilitatorURL: cfg.FacilitatorURL, - VerifyOnly: cfg.VerifyOnly, - Extensions: extensions, - SendPaymentRequired: NewHTMLAwarePaymentRequired(display), + FacilitatorURL: cfg.FacilitatorURL, + VerifyOnly: cfg.VerifyOnly, + Extensions: extensions, + // The cluster verifier always sits behind Traefik (which terminates TLS + // and sets X-Forwarded-Proto=https), so enforcing secure transport here + // is free defense-in-depth — direct P2P sellers leave it off. + RequireSecureTransport: true, + SendPaymentRequired: NewHTMLAwarePaymentRequired(display), }, []x402types.PaymentRequirements{requirement}) upstreamAuth := rule.UpstreamAuth @@ -253,10 +257,12 @@ func (v *Verifier) HandleProxy(w http.ResponseWriter, r *http.Request) { // upstream and settles only after a <400 response, so verifyOnly=false // is correct here. SettlesInProcess suppresses the (otherwise // per-request) verifyOnly=false warning on this safe path. - VerifyOnly: false, - SettlesInProcess: true, - Extensions: extensions, - SendPaymentRequired: NewHTMLAwarePaymentRequired(display), + VerifyOnly: false, + SettlesInProcess: true, + // Behind Traefik (TLS-terminated, X-Forwarded-Proto=https) → free. + RequireSecureTransport: true, + Extensions: extensions, + SendPaymentRequired: NewHTMLAwarePaymentRequired(display), }, []x402types.PaymentRequirements{requirement}) hadPayment := r.Header.Get("X-PAYMENT") != "" diff --git a/internal/x402/verifier_test.go b/internal/x402/verifier_test.go index a20ae64f..df326969 100644 --- a/internal/x402/verifier_test.go +++ b/internal/x402/verifier_test.go @@ -190,6 +190,7 @@ func TestVerifier_PaidRoute_ValidPayment_Returns200(t *testing.T) { req.Header.Set("X-Forwarded-Uri", "/rpc/mainnet") req.Header.Set("X-Forwarded-Host", "obol.stack") req.Header.Set("X-PAYMENT", testPaymentHeader(t)) + req.Header.Set("X-Forwarded-Proto", "https") w := httptest.NewRecorder() v.HandleVerify(w, req) @@ -214,6 +215,7 @@ func TestVerifier_PaidRoute_RejectedPayment_Returns402(t *testing.T) { req.Header.Set("X-Forwarded-Uri", "/rpc/mainnet") req.Header.Set("X-Forwarded-Host", "obol.stack") req.Header.Set("X-PAYMENT", testPaymentHeader(t)) + req.Header.Set("X-Forwarded-Proto", "https") w := httptest.NewRecorder() v.HandleVerify(w, req) @@ -242,6 +244,7 @@ func TestVerifier_VerifyOnly_SkipsSettle(t *testing.T) { req.Header.Set("X-Forwarded-Uri", "/rpc/mainnet") req.Header.Set("X-Forwarded-Host", "obol.stack") req.Header.Set("X-PAYMENT", testPaymentHeader(t)) + req.Header.Set("X-Forwarded-Proto", "https") w := httptest.NewRecorder() v.HandleVerify(w, req) @@ -383,6 +386,7 @@ func TestVerifier_HandleProxy_ValidPayment_SettlesAndStripsPrefix(t *testing.T) req := httptest.NewRequest(http.MethodPost, "/services/demo/v1/chat/completions", strings.NewReader(`{"model":"qwen3.5:9b"}`)) req.Header.Set("Content-Type", "application/json") req.Header.Set("X-PAYMENT", testPaymentHeader(t)) + req.Header.Set("X-Forwarded-Proto", "https") w := httptest.NewRecorder() v.HandleProxy(w, req) @@ -426,6 +430,7 @@ func TestVerifier_HandleProxy_UpstreamFailure_DoesNotSettle(t *testing.T) { req := httptest.NewRequest(http.MethodPost, "/services/demo/v1/chat/completions", strings.NewReader(`{}`)) req.Header.Set("Content-Type", "application/json") req.Header.Set("X-PAYMENT", testPaymentHeader(t)) + req.Header.Set("X-Forwarded-Proto", "https") w := httptest.NewRecorder() v.HandleProxy(w, req) @@ -510,6 +515,7 @@ func TestVerifier_HandleProxy_StreamsSSEChunks(t *testing.T) { } req.Header.Set("Content-Type", "application/json") req.Header.Set("X-PAYMENT", testPaymentHeader(t)) + req.Header.Set("X-Forwarded-Proto", "https") client := &http.Client{Timeout: 15 * time.Second} start := time.Now() @@ -617,6 +623,7 @@ func TestVerifier_HandleProxy_NonStreamingResponse(t *testing.T) { } req.Header.Set("Content-Type", "application/json") req.Header.Set("X-PAYMENT", testPaymentHeader(t)) + req.Header.Set("X-Forwarded-Proto", "https") resp, err := (&http.Client{Timeout: 5 * time.Second}).Do(req) if err != nil { @@ -866,6 +873,7 @@ func TestVerifier_PerRoutePayTo_WithValidPayment(t *testing.T) { req.Header.Set("X-Forwarded-Uri", "/services/test/foo") req.Header.Set("X-Forwarded-Host", "obol.stack") req.Header.Set("X-PAYMENT", testPaymentHeaderFor(t, routeWallet, "1000")) + req.Header.Set("X-Forwarded-Proto", "https") w := httptest.NewRecorder() v.HandleVerify(w, req) @@ -1040,6 +1048,7 @@ func TestVerifier_MetricsVerifiedAndRejectedPayments(t *testing.T) { okReq.Header.Set("X-Forwarded-Uri", "/rpc/mainnet") okReq.Header.Set("X-Forwarded-Host", "obol.stack") okReq.Header.Set("X-PAYMENT", testPaymentHeader(t)) + okReq.Header.Set("X-Forwarded-Proto", "https") okResp := httptest.NewRecorder() okVerifier.HandleVerify(okResp, okReq) if okResp.Code != http.StatusOK { @@ -1065,6 +1074,7 @@ func TestVerifier_MetricsVerifiedAndRejectedPayments(t *testing.T) { rejectReq.Header.Set("X-Forwarded-Uri", "/rpc/mainnet") rejectReq.Header.Set("X-Forwarded-Host", "obol.stack") rejectReq.Header.Set("X-PAYMENT", testPaymentHeader(t)) + rejectReq.Header.Set("X-Forwarded-Proto", "https") rejectResp := httptest.NewRecorder() rejectVerifier.HandleVerify(rejectResp, rejectReq) if rejectResp.Code != http.StatusPaymentRequired { @@ -1141,6 +1151,7 @@ func TestVerifier_LastPaymentSuccessGauge(t *testing.T) { req.Header.Set("X-Forwarded-Host", "obol.stack") if tt.setPayment { req.Header.Set("X-PAYMENT", testPaymentHeader(t)) + req.Header.Set("X-Forwarded-Proto", "https") } before := time.Now().Unix() @@ -1203,6 +1214,7 @@ func TestVerifier_Reload_PrunesDeletedOfferSeries(t *testing.T) { req.Header.Set("X-Forwarded-Uri", path) req.Header.Set("X-Forwarded-Host", "obol.stack") req.Header.Set("X-PAYMENT", testPaymentHeader(t)) + req.Header.Set("X-Forwarded-Proto", "https") rec := httptest.NewRecorder() v.HandleVerify(rec, req) if rec.Code != http.StatusOK { @@ -1502,6 +1514,7 @@ func TestVerifier_PruneSeriesNotIn_DistinguishesAssetSymbol(t *testing.T) { req.Header.Set("X-Forwarded-Uri", path) req.Header.Set("X-Forwarded-Host", "obol.stack") req.Header.Set("X-PAYMENT", testPaymentHeader(t)) + req.Header.Set("X-Forwarded-Proto", "https") rec := httptest.NewRecorder() v.HandleVerify(rec, req) if rec.Code != http.StatusOK { From d52747af4c5c65f6e62c089c00ec6f0b0a3d5c70 Mon Sep 17 00:00:00 2001 From: bussyjd <145845+bussyjd@users.noreply.github.com> Date: Mon, 15 Jun 2026 22:35:27 +0400 Subject: [PATCH 05/11] feat(dataset): buy --join pays x402 to mint a member token MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds a host-side x402 signer (SignExactPayment, EIP-3009 TransferWithAuthorization) and dataset.JoinPaid so 'obol buy dataset --join' probes the seller's 402 paid-join challenge, signs the payment locally, and mints a version-scoped member token — fully peer-to-peer, no cluster, sidecar, or remote signer. --max-price caps the price before signing; the buyer wallet auto-creates at /dataset-serve/buyer.key. Note: one cosmetic 'approve' help-string rename rides along here (it shares a print block with the --join output). --- cmd/obol/dataset.go | 54 ++++++++++- internal/dataset/client.go | 110 +++++++++++++++++++++++ internal/dataset/client_test.go | 75 ++++++++++++++++ internal/x402/clientsign.go | 149 +++++++++++++++++++++++++++++++ internal/x402/clientsign_test.go | 107 ++++++++++++++++++++++ 5 files changed, 491 insertions(+), 4 deletions(-) create mode 100644 internal/x402/clientsign.go create mode 100644 internal/x402/clientsign_test.go diff --git a/cmd/obol/dataset.go b/cmd/obol/dataset.go index f391530d..5957f96b 100644 --- a/cmd/obol/dataset.go +++ b/cmd/obol/dataset.go @@ -265,9 +265,14 @@ func datasetPublishCommand(cfg *config.Config) *cli.Command { u.Infof("Membership: %s", cmd.String("membership")) u.Blank() u.Bold("Buyers fetch with:") - u.Printf(" obol buy dataset %s --id %s --member-token --owner %s", publicURL, id, signer.SignerID()) + if paidJoin != nil { + u.Printf(" obol buy dataset %s --id %s --join --owner %s", publicURL, id, signer.SignerID()) + u.Dim(" (--join pays the x402 price host-side and mints a member token — no cluster needed)") + } else { + u.Printf(" obol buy dataset %s --id %s --member-token --owner %s", publicURL, id, signer.SignerID()) + } if cmd.String("membership") == dataset.MembershipInvite { - u.Dim("Admit a worker's printed code: obol dataset approve ") + u.Dim("Admit a worker's printed code: obol sell data approve ") } u.Dim("Ctrl-C to stop.") @@ -386,9 +391,12 @@ func buyDatasetCommand(cfg *config.Config) *cli.Command { Flags: []cli.Flag{ &cli.StringFlag{Name: "id", Usage: "Dataset id (or embed /dataset/ in the URL)"}, &cli.IntFlag{Name: "version", Usage: "Version to fetch (0 = head)"}, - &cli.StringFlag{Name: "member-token", Usage: "Member token (owner-issued or payment-minted)", Required: true}, + &cli.StringFlag{Name: "member-token", Usage: "Member token (owner-issued or payment-minted). Omit when using --join."}, &cli.StringFlag{Name: "out", Usage: "Output file (default -v.jsonl)"}, &cli.StringFlag{Name: "owner", Usage: "Expected owner 0x address that must have signed the version log (pins identity; recommended)"}, + &cli.BoolFlag{Name: "join", Usage: "Pay the seller's x402 join price to mint a member token (host-side, peer-to-peer; no cluster needed)"}, + &cli.StringFlag{Name: "key", Usage: "Buyer wallet keyfile (default /dataset-serve/buyer.key, auto-created — fund the printed address)"}, + &cli.StringFlag{Name: "max-price", Usage: "Safety cap in atomic units on the --join price"}, }, Action: func(ctx context.Context, cmd *cli.Command) error { u := getUI(cmd) @@ -399,6 +407,38 @@ func buyDatasetCommand(cfg *config.Config) *cli.Command { if id == "" { return fmt.Errorf("dataset id required (pass --id or a /dataset/ URL)") } + + // Resolve the member token: either supplied, or minted by paying the + // seller's x402 join price host-side (peer-to-peer, no cluster). + token := strings.TrimSpace(cmd.String("member-token")) + if cmd.Bool("join") { + if token != "" { + return fmt.Errorf("pass either --member-token or --join, not both") + } + keyPath := strings.TrimSpace(cmd.String("key")) + if keyPath == "" { + keyPath = datasetBuyerKeyPath(cfg) + } + key, kerr := dataset.LoadOrCreateKey(keyPath) + if kerr != nil { + return kerr + } + u.Infof("Buyer wallet: %s (fund this address to pay)", dataset.NewEthSigner(key).SignerID()) + jr, jerr := dataset.JoinPaid(ctx, dataset.JoinOptions{ + BaseURL: base, ID: id, Version: cmd.Int("version"), + MaxAtomic: strings.TrimSpace(cmd.String("max-price")), + }, func(pr x402types.PaymentRequirements) (string, error) { + return x402.SignExactPayment(key, pr) + }) + if jerr != nil { + return jerr + } + token = jr.Token + u.Successf("Paid join: %s atomic units to %s on %s → minted v%d member token", jr.Amount, jr.PayTo, jr.Network, jr.Version) + } else if token == "" { + return fmt.Errorf("provide --member-token , or --join to pay for one") + } + out := cmd.String("out") if out == "" { v := cmd.Int("version") @@ -410,7 +450,7 @@ func buyDatasetCommand(cfg *config.Config) *cli.Command { u.Infof("Fetching %s (version %v) → %s", id, orHead(cmd.Int("version")), out) res, err := dataset.Fetch(ctx, dataset.FetchOptions{ BaseURL: base, ID: id, Version: cmd.Int("version"), - Token: cmd.String("member-token"), OutPath: out, + Token: token, OutPath: out, ExpectedOwner: strings.TrimSpace(cmd.String("owner")), }) if err != nil { @@ -434,6 +474,12 @@ func datasetKeyPath(cfg *config.Config, id string) string { return filepath.Join(datasetServeDir(cfg), id+".key") } +// datasetBuyerKeyPath is the host buyer's wallet keyfile used by `buy dataset +// --join` to pay for member tokens. Auto-created on first use. +func datasetBuyerKeyPath(cfg *config.Config) string { + return filepath.Join(datasetServeDir(cfg), "buyer.key") +} + func datasetStorePath(cfg *config.Config, id string) string { return filepath.Join(datasetServeDir(cfg), id+".store.json") } diff --git a/internal/dataset/client.go b/internal/dataset/client.go index 08c2c21e..8799d56f 100644 --- a/internal/dataset/client.go +++ b/internal/dataset/client.go @@ -5,12 +5,122 @@ import ( "encoding/json" "fmt" "io" + "math/big" "net/http" "os" "strconv" "strings" + + x402types "github.com/x402-foundation/x402/go/types" ) +// SignPaymentFunc signs an x402 payment for a requirement and returns the +// base64 X-PAYMENT header value. Injected so the dataset client stays decoupled +// from the concrete signer (the CLI passes x402.SignExactPayment). +type SignPaymentFunc func(req x402types.PaymentRequirements) (string, error) + +// JoinOptions configures a paid join (pay the seller's x402 price to mint a +// version-scoped member token). +type JoinOptions struct { + BaseURL string + ID string + Version int // 0 = head + MaxAtomic string // optional safety cap on the join price, in atomic units + Client *http.Client +} + +// JoinResult reports a completed paid join. +type JoinResult struct { + Token string + Version int + Amount string // atomic units paid + PayTo string + Network string +} + +// JoinPaid pays the seller's x402 join price to mint a version-scoped member +// token: it probes the /join/paid 402 challenge, signs the advertised payment +// with sign, and POSTs it. Fully host-side and peer-to-peer — no cluster, +// sidecar, or remote signer needed. +func JoinPaid(ctx context.Context, opts JoinOptions, sign SignPaymentFunc) (JoinResult, error) { + if opts.Client == nil { + opts.Client = http.DefaultClient + } + url := strings.TrimSuffix(opts.BaseURL, "/") + "/dataset/" + opts.ID + "/join/paid" + if opts.Version > 0 { + url += "?version=" + strconv.Itoa(opts.Version) + } + + // 1. Probe for the 402 challenge. + probe, err := http.NewRequestWithContext(ctx, http.MethodPost, url, nil) + if err != nil { + return JoinResult{}, err + } + resp, err := opts.Client.Do(probe) + if err != nil { + return JoinResult{}, err + } + pr, err := decodeJoinChallenge(resp) + if err != nil { + return JoinResult{}, err + } + if opts.MaxAtomic != "" { + limit, ok1 := new(big.Int).SetString(opts.MaxAtomic, 10) + price, ok2 := new(big.Int).SetString(pr.Amount, 10) + if ok1 && ok2 && price.Cmp(limit) > 0 { + return JoinResult{}, fmt.Errorf("dataset: join price %s exceeds --max-price %s (atomic units)", pr.Amount, opts.MaxAtomic) + } + } + + // 2. Sign the advertised payment, then 3. POST it to mint the token. + xpay, err := sign(pr) + if err != nil { + return JoinResult{}, fmt.Errorf("dataset: sign join payment: %w", err) + } + payReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, nil) + if err != nil { + return JoinResult{}, err + } + payReq.Header.Set("X-PAYMENT", xpay) + payResp, err := opts.Client.Do(payReq) + if err != nil { + return JoinResult{}, err + } + defer payResp.Body.Close() + body, _ := io.ReadAll(io.LimitReader(payResp.Body, 1<<16)) + if payResp.StatusCode != http.StatusOK { + return JoinResult{}, fmt.Errorf("dataset: paid join %s -> %d: %s", url, payResp.StatusCode, strings.TrimSpace(string(body))) + } + var out struct { + Token string `json:"token"` + Version int `json:"version"` + } + if err := json.Unmarshal(body, &out); err != nil || out.Token == "" { + return JoinResult{}, fmt.Errorf("dataset: paid join returned no token: %s", strings.TrimSpace(string(body))) + } + return JoinResult{Token: out.Token, Version: out.Version, Amount: pr.Amount, PayTo: pr.PayTo, Network: pr.Network}, nil +} + +// decodeJoinChallenge reads the seller's 402 paid-join challenge and returns +// the first advertised payment requirement. +func decodeJoinChallenge(resp *http.Response) (x402types.PaymentRequirements, error) { + defer resp.Body.Close() + if resp.StatusCode != http.StatusPaymentRequired { + body, _ := io.ReadAll(io.LimitReader(resp.Body, 512)) + return x402types.PaymentRequirements{}, fmt.Errorf("dataset: expected a 402 paid-join challenge, got %d: %s", resp.StatusCode, strings.TrimSpace(string(body))) + } + var challenge struct { + Accepts []x402types.PaymentRequirements `json:"accepts"` + } + if err := json.NewDecoder(io.LimitReader(resp.Body, 1<<16)).Decode(&challenge); err != nil { + return x402types.PaymentRequirements{}, fmt.Errorf("dataset: decode 402 challenge: %w", err) + } + if len(challenge.Accepts) == 0 { + return x402types.PaymentRequirements{}, fmt.Errorf("dataset: 402 challenge carried no accepts[]") + } + return challenge.Accepts[0], nil +} + // FetchResult reports what a verified download produced. type FetchResult struct { Version int diff --git a/internal/dataset/client_test.go b/internal/dataset/client_test.go index d73ca864..ce4a3fb5 100644 --- a/internal/dataset/client_test.go +++ b/internal/dataset/client_test.go @@ -3,12 +3,87 @@ package dataset import ( "bytes" "context" + "encoding/json" + "net/http" "net/http/httptest" "os" "path/filepath" "testing" + + x402types "github.com/x402-foundation/x402/go/types" ) +func TestJoinPaid_ProbesSignsAndMints(t *testing.T) { + var sawPayment string + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.URL.Path != "/dataset/ds/join/paid" { + http.Error(w, "not found", http.StatusNotFound) + return + } + if r.Header.Get("X-PAYMENT") == "" { + // 402 challenge; accepts marshalled from the real struct so the wire + // tags are exactly what JoinPaid decodes back. + acc := x402types.PaymentRequirements{ + Scheme: "exact", Network: "eip155:84532", Amount: "1000", + Asset: "0xabc", PayTo: "0xdef", + Extra: map[string]any{"name": "USDC", "version": "2"}, + } + body, _ := json.Marshal(map[string]any{"x402Version": 2, "accepts": []x402types.PaymentRequirements{acc}}) + w.WriteHeader(http.StatusPaymentRequired) + _, _ = w.Write(body) + return + } + sawPayment = r.Header.Get("X-PAYMENT") + _, _ = w.Write([]byte(`{"token":"minted-token-xyz","version":3}`)) + })) + defer srv.Close() + + var signedFor x402types.PaymentRequirements + sign := func(req x402types.PaymentRequirements) (string, error) { + signedFor = req + return "BASE64XPAYMENT", nil + } + + jr, err := JoinPaid(context.Background(), JoinOptions{BaseURL: srv.URL, ID: "ds", Version: 3}, sign) + if err != nil { + t.Fatalf("JoinPaid: %v", err) + } + if jr.Token != "minted-token-xyz" || jr.Version != 3 { + t.Fatalf("result = %+v, want minted-token-xyz / v3", jr) + } + if jr.Amount != "1000" || jr.PayTo != "0xdef" { + t.Fatalf("result terms = %+v, want amount 1000 payTo 0xdef from the 402", jr) + } + if signedFor.PayTo != "0xdef" || signedFor.Amount != "1000" { + t.Fatalf("signer got %+v, want the 402's terms", signedFor) + } + if sawPayment != "BASE64XPAYMENT" { + t.Fatalf("server saw X-PAYMENT %q, want the signed header", sawPayment) + } +} + +func TestJoinPaid_RejectsOverMaxPrice(t *testing.T) { + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + acc := x402types.PaymentRequirements{ + Scheme: "exact", Network: "eip155:84532", Amount: "5000", + Asset: "0xabc", PayTo: "0xdef", Extra: map[string]any{"name": "USDC", "version": "2"}, + } + body, _ := json.Marshal(map[string]any{"x402Version": 2, "accepts": []x402types.PaymentRequirements{acc}}) + w.WriteHeader(http.StatusPaymentRequired) + _, _ = w.Write(body) + })) + defer srv.Close() + + signed := false + sign := func(x402types.PaymentRequirements) (string, error) { signed = true; return "x", nil } + if _, err := JoinPaid(context.Background(), JoinOptions{BaseURL: srv.URL, ID: "ds", MaxAtomic: "1000"}, sign); err == nil { + t.Fatal("JoinPaid accepted a price above --max-price") + } + if signed { + t.Fatal("must reject before signing/paying when the price exceeds the cap") + } +} + func TestFetch_DownloadsAndVerifies(t *testing.T) { ts := newTestServer(t, MembershipOpen, passGate) httpSrv := httptest.NewServer(ts.srv.Handler()) diff --git a/internal/x402/clientsign.go b/internal/x402/clientsign.go new file mode 100644 index 00000000..120aefc5 --- /dev/null +++ b/internal/x402/clientsign.go @@ -0,0 +1,149 @@ +package x402 + +import ( + "crypto/ecdsa" + "crypto/rand" + "encoding/base64" + "encoding/json" + "fmt" + "strconv" + "strings" + "time" + + gethmath "github.com/ethereum/go-ethereum/common/math" + ethcrypto "github.com/ethereum/go-ethereum/crypto" + "github.com/ethereum/go-ethereum/signer/core/apitypes" + x402types "github.com/x402-foundation/x402/go/types" +) + +// pastValidityBuffer backdates an EIP-3009 authorization's validAfter so it is +// not rejected as "not yet valid" when the verifying chain's block.timestamp +// lags wall-clock (the classic anvil-fork skew). Matches obol's buy.py. +const pastValidityBuffer = 300 * time.Second + +// SignExactPayment signs an x402 "exact" (EIP-3009 TransferWithAuthorization) +// payment for req with key and returns the base64 X-PAYMENT header value. It is +// fully host-side — no cluster, sidecar, or remote signer — so a standalone +// buyer can pay a standalone seller directly, peer-to-peer. +// +// Everything needed is taken from the seller's 402 challenge: the EIP-712 token +// domain (name/version) from req.Extra, the verifying contract from req.Asset, +// the recipient from req.PayTo, the amount from req.Amount, and the chain id +// from req.Network (CAIP-2 "eip155:"). +func SignExactPayment(key *ecdsa.PrivateKey, req x402types.PaymentRequirements) (string, error) { + if key == nil { + return "", fmt.Errorf("x402: nil signing key") + } + chainID, err := chainIDFromNetwork(req.Network) + if err != nil { + return "", err + } + name, _ := req.Extra["name"].(string) + version, _ := req.Extra["version"].(string) + if name == "" || version == "" { + return "", fmt.Errorf("x402: 402 challenge missing asset EIP-712 name/version in accepts[].extra") + } + if req.Asset == "" || req.PayTo == "" || req.Amount == "" { + return "", fmt.Errorf("x402: 402 challenge missing asset/payTo/amount") + } + + from := ethcrypto.PubkeyToAddress(key.PublicKey).Hex() + + nonce := make([]byte, 32) + if _, err := rand.Read(nonce); err != nil { + return "", err + } + nonceHex := fmt.Sprintf("0x%x", nonce) + + now := time.Now() + validAfter := now.Add(-pastValidityBuffer).Unix() + if validAfter < 0 { + validAfter = 0 + } + window := time.Duration(req.MaxTimeoutSeconds) * time.Second + if window <= 0 { + window = time.Hour + } + validBefore := now.Add(window).Unix() + + auth := map[string]any{ + "from": from, + "to": req.PayTo, + "value": req.Amount, + "validAfter": strconv.FormatInt(validAfter, 10), + "validBefore": strconv.FormatInt(validBefore, 10), + "nonce": nonceHex, + } + + typedData := apitypes.TypedData{ + Types: apitypes.Types{ + "EIP712Domain": { + {Name: "name", Type: "string"}, + {Name: "version", Type: "string"}, + {Name: "chainId", Type: "uint256"}, + {Name: "verifyingContract", Type: "address"}, + }, + "TransferWithAuthorization": { + {Name: "from", Type: "address"}, + {Name: "to", Type: "address"}, + {Name: "value", Type: "uint256"}, + {Name: "validAfter", Type: "uint256"}, + {Name: "validBefore", Type: "uint256"}, + {Name: "nonce", Type: "bytes32"}, + }, + }, + PrimaryType: "TransferWithAuthorization", + Domain: apitypes.TypedDataDomain{ + Name: name, + Version: version, + ChainId: gethmath.NewHexOrDecimal256(chainID), + VerifyingContract: req.Asset, + }, + Message: apitypes.TypedDataMessage{ + "from": from, + "to": req.PayTo, + "value": req.Amount, + "validAfter": auth["validAfter"], + "validBefore": auth["validBefore"], + "nonce": nonceHex, + }, + } + + hash, _, err := apitypes.TypedDataAndHash(typedData) + if err != nil { + return "", fmt.Errorf("x402: hash typed data: %w", err) + } + sig, err := ethcrypto.Sign(hash, key) + if err != nil { + return "", fmt.Errorf("x402: sign: %w", err) + } + sig[64] += 27 // Ethereum v convention (27/28) + + payload := x402types.PaymentPayload{ + X402Version: 2, + Accepted: req, + Payload: map[string]any{ + "signature": fmt.Sprintf("0x%x", sig), + "authorization": auth, + }, + } + raw, err := json.Marshal(payload) + if err != nil { + return "", err + } + return base64.StdEncoding.EncodeToString(raw), nil +} + +// chainIDFromNetwork parses a CAIP-2 network ("eip155:84532") or a bare decimal +// into the numeric chain id. +func chainIDFromNetwork(network string) (int64, error) { + s := strings.TrimSpace(network) + if i := strings.LastIndex(s, ":"); i >= 0 { + s = s[i+1:] + } + n, err := strconv.ParseInt(strings.TrimSpace(s), 10, 64) + if err != nil || n <= 0 { + return 0, fmt.Errorf("x402: cannot derive chain id from network %q", network) + } + return n, nil +} diff --git a/internal/x402/clientsign_test.go b/internal/x402/clientsign_test.go new file mode 100644 index 00000000..bbaf04ac --- /dev/null +++ b/internal/x402/clientsign_test.go @@ -0,0 +1,107 @@ +package x402 + +import ( + "encoding/base64" + "encoding/json" + "testing" + + "github.com/ethereum/go-ethereum/common" + gethmath "github.com/ethereum/go-ethereum/common/math" + ethcrypto "github.com/ethereum/go-ethereum/crypto" + "github.com/ethereum/go-ethereum/signer/core/apitypes" + x402types "github.com/x402-foundation/x402/go/types" +) + +// SignExactPayment must produce a base64 X-PAYMENT whose EIP-712 signature +// recovers back to the signer's address — i.e. a real, verifiable payment a +// facilitator will accept — entirely host-side. +func TestSignExactPayment_RoundTrip(t *testing.T) { + key, err := ethcrypto.GenerateKey() + if err != nil { + t.Fatal(err) + } + from := ethcrypto.PubkeyToAddress(key.PublicKey).Hex() + + req := BuildV2Requirement(ChainBaseSepolia, "0.01", "0x1111111111111111111111111111111111111111", 0) + + hdr, err := SignExactPayment(key, req) + if err != nil { + t.Fatalf("SignExactPayment: %v", err) + } + + raw, err := base64.StdEncoding.DecodeString(hdr) + if err != nil { + t.Fatalf("decode header: %v", err) + } + var p x402types.PaymentPayload + if err := json.Unmarshal(raw, &p); err != nil { + t.Fatalf("unmarshal: %v", err) + } + if p.X402Version != 2 || p.Accepted.PayTo != req.PayTo || p.Accepted.Amount != req.Amount { + t.Fatalf("payload accepted mismatch: %+v", p.Accepted) + } + authMap, _ := p.Payload["authorization"].(map[string]any) + sigHex, _ := p.Payload["signature"].(string) + if authMap["from"] != from { + t.Fatalf("authorization.from = %v, want signer %v", authMap["from"], from) + } + + chainID, _ := chainIDFromNetwork(req.Network) + typedData := apitypes.TypedData{ + Types: apitypes.Types{ + "EIP712Domain": { + {Name: "name", Type: "string"}, + {Name: "version", Type: "string"}, + {Name: "chainId", Type: "uint256"}, + {Name: "verifyingContract", Type: "address"}, + }, + "TransferWithAuthorization": { + {Name: "from", Type: "address"}, + {Name: "to", Type: "address"}, + {Name: "value", Type: "uint256"}, + {Name: "validAfter", Type: "uint256"}, + {Name: "validBefore", Type: "uint256"}, + {Name: "nonce", Type: "bytes32"}, + }, + }, + PrimaryType: "TransferWithAuthorization", + Domain: apitypes.TypedDataDomain{ + Name: req.Extra["name"].(string), + Version: req.Extra["version"].(string), + ChainId: gethmath.NewHexOrDecimal256(chainID), + VerifyingContract: req.Asset, + }, + Message: apitypes.TypedDataMessage{ + "from": authMap["from"], "to": authMap["to"], "value": authMap["value"], + "validAfter": authMap["validAfter"], "validBefore": authMap["validBefore"], "nonce": authMap["nonce"], + }, + } + hash, _, err := apitypes.TypedDataAndHash(typedData) + if err != nil { + t.Fatalf("hash: %v", err) + } + sig := common.FromHex(sigHex) + if len(sig) != 65 { + t.Fatalf("signature length = %d, want 65", len(sig)) + } + sig[64] -= 27 + pub, err := ethcrypto.SigToPub(hash, sig) + if err != nil { + t.Fatalf("SigToPub: %v", err) + } + if got := ethcrypto.PubkeyToAddress(*pub).Hex(); got != from { + t.Fatalf("recovered signer = %s, want %s — invalid EIP-712 signature", got, from) + } +} + +func TestSignExactPayment_RejectsIncompleteChallenge(t *testing.T) { + key, _ := ethcrypto.GenerateKey() + // A 402 with no asset EIP-712 name/version in extra cannot be signed. + bad := x402types.PaymentRequirements{ + Scheme: "exact", Network: "eip155:84532", + Asset: "0xabc", PayTo: "0xdef", Amount: "1000", + } + if _, err := SignExactPayment(key, bad); err == nil { + t.Fatal("expected error for a 402 missing EIP-712 name/version") + } +} From 271a9d93cfc78dc9cab067d6c7ad591b1ec98a1c Mon Sep 17 00:00:00 2001 From: bussyjd <145845+bussyjd@users.noreply.github.com> Date: Mon, 15 Jun 2026 22:35:27 +0400 Subject: [PATCH 06/11] refactor(cli): rename 'obol dataset' to 'obol sell data' Moves the seller-side dataset command group under 'obol sell' as 'data' (with 'dataset' kept as an alias). 'obol buy dataset' is unchanged. Updates help/error strings, the monetize-dataset guide, the dataset-anonymize skill, and the hf-surface smoke flow. --- cmd/obol/dataset.go | 45 ++++++++++--------- cmd/obol/main.go | 1 - cmd/obol/sell.go | 1 + docs/guides/monetize-dataset.md | 10 ++--- flows/hf-surface-smoke.sh | 6 +-- .../embed/skills/dataset-anonymize/SKILL.md | 6 +-- 6 files changed, 35 insertions(+), 34 deletions(-) diff --git a/cmd/obol/dataset.go b/cmd/obol/dataset.go index 5957f96b..21373a4e 100644 --- a/cmd/obol/dataset.go +++ b/cmd/obol/dataset.go @@ -1,16 +1,16 @@ package main -// obol dataset — owner side of a versioned, membership-gated dataset offer. +// obol sell data — owner side of a versioned, membership-gated dataset offer. // -// obol dataset from --name ingest a bundle as a new -// signed version (creates v1). -// obol dataset version --bundle append the next signed version. -// obol dataset publish host the artifact server on -// this machine + a Cloudflare -// tunnel; gate every byte. -// obol dataset approve admit a worker (membership). -// obol dataset verify walk the signed version chain. -// obol dataset status versions + members. +// obol sell data from --name ingest a bundle as a new +// signed version (creates v1). +// obol sell data version --bundle append the next signed version. +// obol sell data publish host the artifact server on +// this machine + a Cloudflare +// tunnel; gate every byte. +// obol sell data approve admit a worker (membership). +// obol sell data verify walk the signed version chain. +// obol sell data status versions + members. // // The artifact server is the host gateway (same spirit as `obol sell // inference` / `obol research publish`): it runs on the owner's machine, never @@ -47,10 +47,11 @@ type datasetState struct { OwnerToken string `json:"owner_token"` } -func datasetCommand(cfg *config.Config) *cli.Command { +func sellDataCommand(cfg *config.Config) *cli.Command { return &cli.Command{ - Name: "dataset", - Usage: "Publish and sell versioned, membership-gated datasets", + Name: "data", + Aliases: []string{"dataset"}, + Usage: "Publish and sell versioned, membership-gated datasets", Commands: []*cli.Command{ datasetFromCommand(cfg), datasetVersionCommand(cfg), @@ -70,7 +71,7 @@ func datasetFromCommand(cfg *config.Config) *cli.Command { Flags: []cli.Flag{&cli.StringFlag{Name: "name", Usage: "Dataset id", Required: true}}, Action: func(_ context.Context, cmd *cli.Command) error { if cmd.NArg() != 1 { - return fmt.Errorf("bundle directory required: obol dataset from --name ") + return fmt.Errorf("bundle directory required: obol sell data from --name ") } return appendDatasetVersion(cfg, cmd, strings.TrimSpace(cmd.String("name")), cmd.Args().First()) }, @@ -85,11 +86,11 @@ func datasetVersionCommand(cfg *config.Config) *cli.Command { Flags: []cli.Flag{&cli.StringFlag{Name: "bundle", Usage: "New bundle directory", Required: true}}, Action: func(_ context.Context, cmd *cli.Command) error { if cmd.NArg() != 1 { - return fmt.Errorf("dataset id required: obol dataset version --bundle ") + return fmt.Errorf("dataset id required: obol sell data version --bundle ") } id := strings.TrimSpace(cmd.Args().First()) if _, err := os.Stat(datasetStorePath(cfg, id)); err != nil { - return fmt.Errorf("dataset %q not found — create it with 'obol dataset from'", id) + return fmt.Errorf("dataset %q not found — create it with 'obol sell data from'", id) } return appendDatasetVersion(cfg, cmd, id, cmd.String("bundle")) }, @@ -144,7 +145,7 @@ func appendDatasetVersion(cfg *config.Config, cmd *cli.Command, id, bundleDir st u.Infof("File hash: %s", v.FileHash) u.Infof("Size: %d bytes", v.Size) u.Infof("Owner: %s", signer.SignerID()) - u.Dim("Publish it with: obol dataset publish " + id) + u.Dim("Publish it with: obol sell data publish " + id) return nil } @@ -164,7 +165,7 @@ func datasetPublishCommand(cfg *config.Config) *cli.Command { Action: func(ctx context.Context, cmd *cli.Command) error { u := getUI(cmd) if cmd.NArg() != 1 { - return fmt.Errorf("dataset id required: obol dataset publish ") + return fmt.Errorf("dataset id required: obol sell data publish ") } id := strings.TrimSpace(cmd.Args().First()) @@ -180,7 +181,7 @@ func datasetPublishCommand(cfg *config.Config) *cli.Command { return err } if len(st.Versions) == 0 { - return fmt.Errorf("dataset %q has no versions — run 'obol dataset from' first", id) + return fmt.Errorf("dataset %q has no versions — run 'obol sell data from' first", id) } // Never serve a chain we cannot verify against the owner key: a // tampered persisted store must fail closed, not be published. @@ -302,7 +303,7 @@ func datasetApproveCommand(cfg *config.Config) *cli.Command { Action: func(ctx context.Context, cmd *cli.Command) error { u := getUI(cmd) if cmd.NArg() != 1 { - return fmt.Errorf("user code required: obol dataset approve ") + return fmt.Errorf("user code required: obol sell data approve ") } st, err := loadDatasetState(cfg, cmd.String("dataset")) if err != nil { @@ -332,7 +333,7 @@ func datasetVerifyCommand(cfg *config.Config) *cli.Command { Action: func(_ context.Context, cmd *cli.Command) error { u := getUI(cmd) if cmd.NArg() != 1 { - return fmt.Errorf("dataset id required: obol dataset verify ") + return fmt.Errorf("dataset id required: obol sell data verify ") } id := strings.TrimSpace(cmd.Args().First()) key, err := dataset.LoadOrCreateKey(datasetKeyPath(cfg, id)) @@ -363,7 +364,7 @@ func datasetStatusCommand(cfg *config.Config) *cli.Command { Action: func(ctx context.Context, cmd *cli.Command) error { u := getUI(cmd) if cmd.NArg() != 1 { - return fmt.Errorf("dataset id required: obol dataset status ") + return fmt.Errorf("dataset id required: obol sell data status ") } id := strings.TrimSpace(cmd.Args().First()) st, err := dataset.NewStore(datasetStorePath(cfg, id)).Load() diff --git a/cmd/obol/main.go b/cmd/obol/main.go index e3c33eef..e4876caf 100644 --- a/cmd/obol/main.go +++ b/cmd/obol/main.go @@ -330,7 +330,6 @@ GLOBAL OPTIONS:{{template "visibleFlagTemplate" .}}{{end}} bountyCommand(cfg), smokeCommand(cfg), researchCommand(cfg), - datasetCommand(cfg), modelCommand(cfg), { Name: "app", diff --git a/cmd/obol/sell.go b/cmd/obol/sell.go index 6e2b661f..44a9faea 100644 --- a/cmd/obol/sell.go +++ b/cmd/obol/sell.go @@ -67,6 +67,7 @@ func sellCommand(cfg *config.Config) *cli.Command { sellIdentityCommand(cfg), sellInfoCommand(cfg), sellResumeCommand(cfg), + sellDataCommand(cfg), }, } } diff --git a/docs/guides/monetize-dataset.md b/docs/guides/monetize-dataset.md index e7d33266..af86d55d 100644 --- a/docs/guides/monetize-dataset.md +++ b/docs/guides/monetize-dataset.md @@ -35,24 +35,24 @@ the `dataset-anonymize` skill. ## 2. Record a signed version ```bash -obol dataset from my-bundle --name pi-sessions +obol sell data from my-bundle --name pi-sessions ``` This reads the bundle, computes the artifact's whole-file SHA-256, and appends a **signed** `DatasetVersion` (v1) to the dataset's version log — chained to its predecessor, signed by your owner key (the address buyers pin). Append a new -snapshot later with `obol dataset version pi-sessions --bundle my-bundle-v2`. +snapshot later with `obol sell data version pi-sessions --bundle my-bundle-v2`. Walk the chain offline at any time: ```bash -obol dataset verify pi-sessions # rejects any reorder/tamper/middle-removal +obol sell data verify pi-sessions # rejects any reorder/tamper/middle-removal ``` ## 3. Publish (host + tunnel + gate) ```bash -obol dataset publish pi-sessions --membership invite +obol sell data publish pi-sessions --membership invite ``` Starts the artifact server on your machine and a Cloudflare tunnel. **Bytes @@ -64,7 +64,7 @@ alike. Two ways a caller holds a member token: - **Pre-approved worker** — joins via device-auth; you run - `obol dataset approve `. Gets full (head) access. + `obol sell data approve `. Gets full (head) access. - **Anonymous market buyer** — pays the priced offer; the edge x402 verifier proves the settled payment, and the server mints a token scoped to exactly the version paid for (`/join/paid`). Payment *is* the approval; the dataset diff --git a/flows/hf-surface-smoke.sh b/flows/hf-surface-smoke.sh index 0dcb4e1c..8947c587 100755 --- a/flows/hf-surface-smoke.sh +++ b/flows/hf-surface-smoke.sh @@ -77,8 +77,8 @@ fi HASH=$(shasum -a 256 "$BUNDLE/sft.jsonl" | awk '{print $1}') printf '{"hash":"%s","files":["sft.jsonl"]}\n' "$HASH" > "$BUNDLE/manifest.json" -if "$OBOL_BIN" dataset from "$BUNDLE" --name "$DS_ID" >/dev/null 2>&1 \ - && "$OBOL_BIN" dataset verify "$DS_ID" 2>&1 | grep -q 'Chain valid'; then +if "$OBOL_BIN" sell data from "$BUNDLE" --name "$DS_ID" >/dev/null 2>&1 \ + && "$OBOL_BIN" sell data verify "$DS_ID" 2>&1 | grep -q 'Chain valid'; then pass "1b sign + verify — signed version chain valid" else fail "1b sign+verify" "version not recorded or chain invalid" @@ -86,7 +86,7 @@ fi MANIFEST_HASH=$(python3 -c "import json;print(json.load(open('$OBOL_CONFIG_DIR/dataset-serve/$DS_ID.store.json'))['versions'][0]['manifestHash'])") -"$OBOL_BIN" dataset publish "$DS_ID" --membership open --port "$DS_PORT" --no-tunnel >/dev/null 2>&1 & +"$OBOL_BIN" sell data publish "$DS_ID" --membership open --port "$DS_PORT" --no-tunnel >/dev/null 2>&1 & curl -sf --retry 25 --retry-connrefused --retry-delay 1 "http://127.0.0.1:$DS_PORT/healthz" >/dev/null OWNER=$(python3 -c "import json;print(json.load(open('$OBOL_CONFIG_DIR/dataset-serve/$DS_ID.state.json'))['owner_token'])" 2>/dev/null) diff --git a/internal/embed/skills/dataset-anonymize/SKILL.md b/internal/embed/skills/dataset-anonymize/SKILL.md index d811cdc7..be8e8d0d 100644 --- a/internal/embed/skills/dataset-anonymize/SKILL.md +++ b/internal/embed/skills/dataset-anonymize/SKILL.md @@ -1,6 +1,6 @@ --- name: dataset-anonymize -description: Anonymize a dataset's JSONL (PII detection + masking) before publishing or selling it with `obol dataset`. Pluggable detector — built-in regex redactor by default, or a BYO Hugging Face token-classification model. +description: Anonymize a dataset's JSONL (PII detection + masking) before publishing or selling it with `obol sell data`. Pluggable detector — built-in regex redactor by default, or a BYO Hugging Face token-classification model. --- # dataset-anonymize @@ -38,8 +38,8 @@ export OBOL_ANONYMIZER_MODEL="/" python3 scripts/anonymize.py input.jsonl anonymized.jsonl --report # Then ingest the anonymized bundle and publish it: -obol dataset from --name my-dataset -obol dataset publish my-dataset +obol sell data from --name my-dataset +obol sell data publish my-dataset ``` Each input line is a JSON object; the script masks string values under From c5beb5c349d8b5fea2a9ed0723fe5b063b1789ca Mon Sep 17 00:00:00 2001 From: bussyjd <145845+bussyjd@users.noreply.github.com> Date: Tue, 16 Jun 2026 16:36:16 +0400 Subject: [PATCH 07/11] refactor(offerkind): declarative per-type integrity-profile registry MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit internal/offerkind is the single source of truth for what each ServiceOffer/Request type means — render/discovery shapes, price slots, capability flags, and a declarative IntegrityProfile per type. Routes x402 storefront copy + bazaar, the OpenAPI path shape, and the verifier's 402 integrity-metadata dispatch through it; centralizes price-slot detection in monetizeapi.Price.RawAndSlot(); adds a buy-side owner-pin nudge and a CRD-enum drift guard. Behavior-preserving. --- cmd/obol/dataset.go | 10 ++ internal/monetizeapi/types.go | 21 +++ internal/offerkind/offerkind.go | 195 +++++++++++++++++++++ internal/offerkind/offerkind_test.go | 116 ++++++++++++ internal/serviceoffercontroller/openapi.go | 7 +- internal/serviceoffercontroller/render.go | 13 +- internal/x402/bazaar.go | 6 +- internal/x402/mergetyped_test.go | 47 +++++ internal/x402/paymentrequired.go | 19 +- internal/x402/serviceoffer_source.go | 29 +-- internal/x402/verifier.go | 29 ++- 11 files changed, 441 insertions(+), 51 deletions(-) create mode 100644 internal/offerkind/offerkind.go create mode 100644 internal/offerkind/offerkind_test.go create mode 100644 internal/x402/mergetyped_test.go diff --git a/cmd/obol/dataset.go b/cmd/obol/dataset.go index 21373a4e..d17d149f 100644 --- a/cmd/obol/dataset.go +++ b/cmd/obol/dataset.go @@ -34,6 +34,7 @@ import ( "github.com/ObolNetwork/obol-stack/internal/config" "github.com/ObolNetwork/obol-stack/internal/dataset" + "github.com/ObolNetwork/obol-stack/internal/offerkind" x402 "github.com/ObolNetwork/obol-stack/internal/x402" "github.com/urfave/cli/v3" x402types "github.com/x402-foundation/x402/go/types" @@ -448,6 +449,15 @@ func buyDatasetCommand(cfg *config.Config) *cli.Command { } out = fmt.Sprintf("%s-v%d.jsonl", id, v) } + // A dataset's integrity profile mandates signed-log content + // integrity, but the signed chain only proves it is self-consistent + // — not WHO signed it. Without an --owner pin, a seller that swapped + // its signing key still verifies, so surface the gap (the profile + // declares this check required for the guarantee to hold). + if offerkind.Resolve("dataset").Integrity.Content == offerkind.ContentSignedVersionLog && + strings.TrimSpace(cmd.String("owner")) == "" { + u.Warnf("No --owner pin: the signed version log proves chain consistency but not the signer's identity. Pass --owner 0x for full content integrity.") + } u.Infof("Fetching %s (version %v) → %s", id, orHead(cmd.Int("version")), out) res, err := dataset.Fetch(ctx, dataset.FetchOptions{ BaseURL: base, ID: id, Version: cmd.Int("version"), diff --git a/internal/monetizeapi/types.go b/internal/monetizeapi/types.go index 7ab6c077..4c0ad593 100644 --- a/internal/monetizeapi/types.go +++ b/internal/monetizeapi/types.go @@ -402,6 +402,27 @@ type ServiceOfferPriceTable struct { PerMB string `json:"perMB,omitempty"` } +// RawAndSlot returns the raw decimal price string and which price slot is +// populated, in precedence order (perRequest > perMTok > perHour > perMB). It +// is the single source of truth for price-slot detection, shared by the +// catalog renderer (serviceoffercontroller) and the verifier's effectivePrice +// (internal/x402). PerEpoch is intentionally not surfaced — no caller enforces +// it yet, so including it would change behavior. An empty slot means no price. +func (p ServiceOfferPriceTable) RawAndSlot() (raw, slot string) { + switch { + case p.PerRequest != "": + return p.PerRequest, "perRequest" + case p.PerMTok != "": + return p.PerMTok, "perMTok" + case p.PerHour != "": + return p.PerHour, "perHour" + case p.PerMB != "": + return p.PerMB, "perMB" + default: + return "", "" + } +} + type ServiceOfferRegistration struct { // If true, register on ERC-8004 after routing is live. // +kubebuilder:default=false diff --git a/internal/offerkind/offerkind.go b/internal/offerkind/offerkind.go new file mode 100644 index 00000000..59dbf05d --- /dev/null +++ b/internal/offerkind/offerkind.go @@ -0,0 +1,195 @@ +// Package offerkind is the single source of truth for what a ServiceOffer / +// ServiceRequest "type" means: how it renders (storefront copy, bazaar and +// OpenAPI discovery shapes), which price slot it uses, and — crucially — +// which integrity checks apply to it. +// +// It replaces the type-collapse logic that was previously duplicated across +// packages: internal/x402 (normalizeOfferType) and +// internal/serviceoffercontroller (fallbackOfferType, openAPIPathsForOffer, +// offerPriceRawAndUnit). Each of those re-implemented "given a spec.type, +// what shape is this" with subtly different defaults. Centralizing here means +// adding a 7th service type is a single table entry instead of an 8-file sweep. +// +// Design: this is a ZERO-DEPENDENCY leaf package (stdlib only). Call sites pass +// the raw spec.type string (offer.Spec.Type), never a CRD struct, so both x402 +// and the controller can import it with no risk of an import cycle. The data +// table mirrors how internal/bounty/registry.go makes task types data-driven. +// +// Integrity is kept strictly separate from pricing/upstream/rendering: the +// IntegrityProfile declares only authenticity/identity/scope obligations, not +// "is the price valid" (that is a pricing concern, not an integrity one). +package offerkind + +// PaymentClass is the payment-integrity obligation for a type. Payment +// verification itself is uniform x402 "exact"; the only real axis is whether a +// payment proof is required at all (it always is, today) — method (card vs +// crypto) is handled separately by the verifier, not here. +type PaymentClass string + +const ( + PaymentX402Exact PaymentClass = "x402-exact" + PaymentNone PaymentClass = "none" +) + +// ContentClass is the data-authenticity obligation: how a buyer proves the +// bytes it received are the bytes the seller committed to. +type ContentClass string + +const ( + ContentNone ContentClass = "none" + ContentSignedVersionLog ContentClass = "signed-version-log" // dataset / fine-tuning: owner-signed secp256k1 hash-chain (internal/dataset) + ContentBundleSHA256 ContentClass = "bundle-sha256" // skill: controller-validated bundle hash+size +) + +// IdentityClass is the caller-identity / membership obligation. +type IdentityClass string + +const ( + IdentityNone IdentityClass = "none" + IdentityGroupAuth IdentityClass = "groupauth" // membership-gated via internal/research/groupauth +) + +// ScopeClass is the entitlement-scope obligation layered on top of membership. +type ScopeClass string + +const ( + ScopeNone ScopeClass = "none" + ScopeVersionEntitlement ScopeClass = "version-entitlement" // dataset: token entitled only up to a paid version +) + +// IntegrityProfile declares the integrity checks a service type requires. +// Consumed by the verifier and controller (to enforce) and by the buy-side +// (to know what to verify before trusting a response). +type IntegrityProfile struct { + Payment PaymentClass + Content ContentClass + Identity IdentityClass + Scope ScopeClass +} + +// Kind is the resolved capability + integrity descriptor for one spec.type. +type Kind struct { + // Type is the canonical spec.type string this entry represents ("" for the + // unset default). Resolve(unknown) returns the http Kind, so its Type is + // "http", not the unknown input. + Type string + + // PaymentCopy collapses the type into the three storefront-copy branches + // ("inference" | "agent" | "http"). Replaces x402.normalizeOfferType. + PaymentCopy string + // BazaarShape is the x402 bazaar discovery shape ("chat" | "generic"). + BazaarShape string + // OpenAPIShape is the controller's OpenAPI path shape + // ("chat" | "multipart" | "generic"). + OpenAPIShape string + // CatalogType is the display/catalog label (fallbackOfferType): the type + // string, or "http" when unset. + CatalogType string + + // PriceUnits lists the price slot(s) a type conventionally uses, in + // precedence order. Informational/validation; the live price reader still + // keys off whichever Price.* field is populated. + PriceUnits []string + + // SemanticInference mirrors monetizeapi.(*ServiceOffer).IsInference(): true + // for "" and "inference". Drives model-reconciliation gating and the + // OpenAPI empty-type edge (IsInference("")==true → chat shape). + SemanticInference bool + // ResolvesAgentRef: upstream comes from an Agent CR status, not spec. + ResolvesAgentRef bool + // RendersBundle: controller renders a skill bundle server. + RendersBundle bool + // OneShotPurchase: price is a total (e.g. perMB × size), not a rate. + OneShotPurchase bool + + Integrity IntegrityProfile +} + +// paymentOnly is the integrity profile for inference/http/agent: an x402 +// payment proof, nothing else. +var paymentOnly = IntegrityProfile{ + Payment: PaymentX402Exact, + Content: ContentNone, + Identity: IdentityNone, + Scope: ScopeNone, +} + +// kinds is the table. Keys are spec.type strings; "" is the unset default and +// is deliberately distinct from "inference" because the legacy code treats the +// empty type inconsistently — http-presentational (normalizeOfferType, +// fallbackOfferType) yet inference-semantic (IsInference, openAPIPathsForOffer). +// Encoding both faithfully keeps this refactor behavior-preserving. +var kinds = map[string]Kind{ + "": { + Type: "", PaymentCopy: "http", BazaarShape: "generic", OpenAPIShape: "chat", + CatalogType: "http", PriceUnits: []string{"perRequest", "perMTok"}, + SemanticInference: true, Integrity: paymentOnly, + }, + "inference": { + Type: "inference", PaymentCopy: "inference", BazaarShape: "chat", OpenAPIShape: "chat", + CatalogType: "inference", PriceUnits: []string{"perRequest", "perMTok"}, + SemanticInference: true, Integrity: paymentOnly, + }, + "http": { + Type: "http", PaymentCopy: "http", BazaarShape: "generic", OpenAPIShape: "generic", + CatalogType: "http", PriceUnits: []string{"perRequest"}, + Integrity: paymentOnly, + }, + "agent": { + Type: "agent", PaymentCopy: "agent", BazaarShape: "chat", OpenAPIShape: "chat", + CatalogType: "agent", PriceUnits: []string{"perRequest", "perMTok"}, + ResolvesAgentRef: true, Integrity: paymentOnly, + }, + "dataset": { + Type: "dataset", PaymentCopy: "http", BazaarShape: "generic", OpenAPIShape: "generic", + CatalogType: "dataset", PriceUnits: []string{"perMB"}, + OneShotPurchase: true, + Integrity: IntegrityProfile{ + Payment: PaymentX402Exact, + Content: ContentSignedVersionLog, + Identity: IdentityGroupAuth, + Scope: ScopeVersionEntitlement, + }, + }, + "fine-tuning": { + Type: "fine-tuning", PaymentCopy: "http", BazaarShape: "generic", OpenAPIShape: "multipart", + CatalogType: "fine-tuning", PriceUnits: []string{"perHour"}, + Integrity: IntegrityProfile{ + Payment: PaymentX402Exact, + Content: ContentSignedVersionLog, // reuses the dataset signed-log primitives + }, + }, + "skill": { + Type: "skill", PaymentCopy: "http", BazaarShape: "generic", OpenAPIShape: "generic", + CatalogType: "skill", PriceUnits: []string{"perRequest"}, + RendersBundle: true, + Integrity: IntegrityProfile{ + Payment: PaymentX402Exact, + Content: ContentBundleSHA256, + }, + }, +} + +// Resolve returns the Kind for a spec.type string. An unrecognized non-empty +// type falls back to the http Kind (payment-only, generic shapes) — matching +// the legacy normalizeOfferType / openAPIPathsForOffer defaults for unknown +// types. The empty string resolves to its own dedicated entry. +func Resolve(t string) Kind { + if k, ok := kinds[t]; ok { + return k + } + return kinds["http"] +} + +// Types returns the canonical service-type strings the registry knows +// (excluding the "" default), for drift checks against the CRD enum. +func Types() []string { + out := make([]string, 0, len(kinds)) + for k := range kinds { + if k == "" { + continue + } + out = append(out, k) + } + return out +} diff --git a/internal/offerkind/offerkind_test.go b/internal/offerkind/offerkind_test.go new file mode 100644 index 00000000..a3efc8e8 --- /dev/null +++ b/internal/offerkind/offerkind_test.go @@ -0,0 +1,116 @@ +package offerkind + +import ( + "os" + "strings" + "testing" +) + +// TestResolve_CoversCRDEnum is the drift guard: every value in the +// ServiceOffer.spec.type CRD enum (monetizeapi/types.go) must have a Kind. +// Adding a 7th type to the enum without a table entry fails here, the same way +// TestOpenClawVersionConsistency catches version drift. +func TestResolve_CoversCRDEnum(t *testing.T) { + src, err := os.ReadFile("../monetizeapi/types.go") + if err != nil { + t.Fatalf("read types.go: %v", err) + } + var enumLine string + for _, ln := range strings.Split(string(src), "\n") { + if strings.Contains(ln, "+kubebuilder:validation:Enum=") && strings.Contains(ln, "inference") { + enumLine = ln + break + } + } + if enumLine == "" { + t.Fatal("could not find the ServiceOfferSpec.Type enum in monetizeapi/types.go") + } + _, rhs, _ := strings.Cut(enumLine, "Enum=") + values := strings.Split(strings.TrimSpace(rhs), ";") + if len(values) < 6 { + t.Fatalf("expected >=6 enum values, got %d from %q", len(values), rhs) + } + for _, v := range values { + v = strings.TrimSpace(v) + k := Resolve(v) + if k.Type != v { + t.Errorf("CRD enum value %q has no offerkind Kind (Resolve→Type %q); add it to kinds", v, k.Type) + } + if k.Integrity.Payment == "" { + t.Errorf("type %q has an empty Payment class", v) + } + } +} + +// TestResolve_LegacyCollapseValues locks the exact collapse values the legacy +// normalizeOfferType / openAPIPathsForOffer produced, so the rewire stays +// behavior-preserving — including the deliberate "" split (generic bazaar but +// chat openapi, because IsInference("")==true). +func TestResolve_LegacyCollapseValues(t *testing.T) { + cases := []struct{ typ, paymentCopy, bazaar, openapi string }{ + {"", "http", "generic", "chat"}, + {"inference", "inference", "chat", "chat"}, + {"http", "http", "generic", "generic"}, + {"agent", "agent", "chat", "chat"}, + {"dataset", "http", "generic", "generic"}, + {"fine-tuning", "http", "generic", "multipart"}, + {"skill", "http", "generic", "generic"}, + {"totally-unknown", "http", "generic", "generic"}, + } + for _, c := range cases { + k := Resolve(c.typ) + if k.PaymentCopy != c.paymentCopy { + t.Errorf("Resolve(%q).PaymentCopy = %q, want %q", c.typ, k.PaymentCopy, c.paymentCopy) + } + if k.BazaarShape != c.bazaar { + t.Errorf("Resolve(%q).BazaarShape = %q, want %q", c.typ, k.BazaarShape, c.bazaar) + } + if k.OpenAPIShape != c.openapi { + t.Errorf("Resolve(%q).OpenAPIShape = %q, want %q", c.typ, k.OpenAPIShape, c.openapi) + } + } +} + +func TestResolve_IntegrityProfiles(t *testing.T) { + if got := Resolve("inference").Integrity; got != paymentOnly { + t.Errorf("inference integrity = %+v, want payment-only", got) + } + if got := Resolve("http").Integrity; got != paymentOnly { + t.Errorf("http integrity = %+v, want payment-only", got) + } + ds := Resolve("dataset").Integrity + if ds.Content != ContentSignedVersionLog || ds.Scope != ScopeVersionEntitlement || ds.Identity != IdentityGroupAuth { + t.Errorf("dataset integrity = %+v, want signed-log + version-entitlement + groupauth", ds) + } + if got := Resolve("skill").Integrity.Content; got != ContentBundleSHA256 { + t.Errorf("skill content = %q, want bundle-sha256", got) + } + if got := Resolve("fine-tuning").Integrity.Content; got != ContentSignedVersionLog { + t.Errorf("fine-tuning content = %q, want signed-version-log", got) + } +} + +func TestResolve_SemanticInference(t *testing.T) { + for _, typ := range []string{"", "inference"} { + if !Resolve(typ).SemanticInference { + t.Errorf("Resolve(%q).SemanticInference = false, want true (matches IsInference)", typ) + } + } + for _, typ := range []string{"http", "agent", "dataset", "fine-tuning", "skill"} { + if Resolve(typ).SemanticInference { + t.Errorf("Resolve(%q).SemanticInference = true, want false", typ) + } + } +} + +func TestResolve_CapabilityFlags(t *testing.T) { + if !Resolve("agent").ResolvesAgentRef { + t.Error("agent should resolve an Agent ref") + } + if !Resolve("skill").RendersBundle { + t.Error("skill should render a bundle") + } + if !Resolve("dataset").OneShotPurchase { + t.Error("dataset is a one-shot purchase (perMB→total)") + } +} diff --git a/internal/serviceoffercontroller/openapi.go b/internal/serviceoffercontroller/openapi.go index 5e6a9688..9024de44 100644 --- a/internal/serviceoffercontroller/openapi.go +++ b/internal/serviceoffercontroller/openapi.go @@ -7,6 +7,7 @@ import ( "time" "github.com/ObolNetwork/obol-stack/internal/monetizeapi" + "github.com/ObolNetwork/obol-stack/internal/offerkind" "github.com/ObolNetwork/obol-stack/internal/schemas" ) @@ -201,8 +202,8 @@ func openAPIPathsForOffer(offer *monetizeapi.ServiceOffer) map[string]map[string if offer == nil { return nil } - switch { - case offer.IsInference(), offer.IsAgent(): + switch offerkind.Resolve(offer.Spec.Type).OpenAPIShape { + case "chat": return map[string]map[string]any{ "/v1/chat/completions": { "post": openAPIOperation(offer, openAPIOperationOptions{ @@ -219,7 +220,7 @@ func openAPIPathsForOffer(offer *monetizeapi.ServiceOffer) map[string]map[string }), }, } - case strings.EqualFold(offer.Spec.Type, "fine-tuning"): + case "multipart": return map[string]map[string]any{ "": { "post": openAPIOperation(offer, openAPIOperationOptions{ diff --git a/internal/serviceoffercontroller/render.go b/internal/serviceoffercontroller/render.go index 72c467e2..d0a989a3 100644 --- a/internal/serviceoffercontroller/render.go +++ b/internal/serviceoffercontroller/render.go @@ -1194,18 +1194,7 @@ func buildServiceCatalogJSON(offers []*monetizeapi.ServiceOffer, baseURL string) // occupies in the price table. Only one of perRequest / perMTok / perHour / // perMB is expected to be set on a given offer. func offerPriceRawAndUnit(offer *monetizeapi.ServiceOffer) (string, string) { - switch { - case offer.Spec.Payment.Price.PerRequest != "": - return offer.Spec.Payment.Price.PerRequest, "perRequest" - case offer.Spec.Payment.Price.PerMTok != "": - return offer.Spec.Payment.Price.PerMTok, "perMTok" - case offer.Spec.Payment.Price.PerHour != "": - return offer.Spec.Payment.Price.PerHour, "perHour" - case offer.Spec.Payment.Price.PerMB != "": - return offer.Spec.Payment.Price.PerMB, "perMB" - default: - return "", "" - } + return offer.Spec.Payment.Price.RawAndSlot() } // offerAssetJSON resolves the settlement asset block. If the offer carries an diff --git a/internal/x402/bazaar.go b/internal/x402/bazaar.go index db3bf8ee..348cf66a 100644 --- a/internal/x402/bazaar.go +++ b/internal/x402/bazaar.go @@ -1,5 +1,7 @@ package x402 +import "github.com/ObolNetwork/obol-stack/internal/offerkind" + // The x402 v2 `bazaar` discovery extension (specs/extensions/bazaar.md in // x402-foundation/x402). Every paid route advertises {info, schema} in the // 402 response's `extensions.bazaar` so facilitators and indexers @@ -53,8 +55,8 @@ func WithBazaar(extensions map[string]any, offerType, model string) map[string]a // openAPIPathsForOffer in internal/serviceoffercontroller); everything else // gets the generic operator-defined JSON shape. func BuildBazaarExtension(offerType, model string) map[string]any { - switch normalizeOfferType(offerType) { - case "inference", "agent": + switch offerkind.Resolve(offerType).BazaarShape { + case "chat": return bazaarChatCompletions(model) default: return bazaarGenericJSON() diff --git a/internal/x402/mergetyped_test.go b/internal/x402/mergetyped_test.go new file mode 100644 index 00000000..846c300d --- /dev/null +++ b/internal/x402/mergetyped_test.go @@ -0,0 +1,47 @@ +package x402 + +import ( + "testing" + + x402types "github.com/x402-foundation/x402/go/types" +) + +// TestMergeTypedExtras_ProfileDriven verifies the 402 discovery metadata is +// selected by the offer type's integrity profile (offerkind), not by which +// RouteRule fields happen to be populated. +func TestMergeTypedExtras_ProfileDriven(t *testing.T) { + // A signed-log dataset route surfaces its content commitment. + req := x402types.PaymentRequirements{} + mergeTypedExtras(&req, &RouteRule{OfferType: "dataset", DatasetManifestHash: "abc", DatasetFileHash: "def"}) + if _, ok := req.Extra["dataset"]; !ok { + t.Error("dataset route should surface extra.dataset") + } + + // A skill route surfaces its bundle identity. + req = x402types.PaymentRequirements{} + mergeTypedExtras(&req, &RouteRule{OfferType: "skill", SkillName: "buy-x402", SkillSHA256: "deadbeef"}) + if _, ok := req.Extra["skill"]; !ok { + t.Error("skill route should surface extra.skill") + } + + // An agent route surfaces its model. + req = x402types.PaymentRequirements{} + mergeTypedExtras(&req, &RouteRule{OfferType: "agent", AgentModel: "qwen"}) + if req.Extra["agentModel"] != "qwen" { + t.Errorf("agent route should surface agentModel, got %v", req.Extra["agentModel"]) + } + + // An inference route carrying a STRAY dataset field surfaces NOTHING: the + // dispatch is profile-driven (inference is payment-only, ContentNone), so a + // leaked field is ignored. This is stronger than the old unconditional + // merge, yet identical for every real route — serviceoffer_source.go only + // populates a type's fields when the offer is that type. + req = x402types.PaymentRequirements{} + mergeTypedExtras(&req, &RouteRule{OfferType: "inference", DatasetManifestHash: "should-be-ignored"}) + if _, ok := req.Extra["dataset"]; ok { + t.Error("inference route must not surface dataset extras even if a field leaks") + } + if len(req.Extra) != 0 { + t.Errorf("payment-only route should attach no typed extras, got %v", req.Extra) + } +} diff --git a/internal/x402/paymentrequired.go b/internal/x402/paymentrequired.go index 81853874..25b0ad86 100644 --- a/internal/x402/paymentrequired.go +++ b/internal/x402/paymentrequired.go @@ -11,6 +11,7 @@ import ( "regexp" "strings" + "github.com/ObolNetwork/obol-stack/internal/offerkind" x402types "github.com/x402-foundation/x402/go/types" ) @@ -334,7 +335,7 @@ type typeCopy struct { func buildTypeCopy(siteURL, endpoint string, d PaymentDisplay) typeCopy { url := siteURL + endpoint - switch normalizeOfferType(d.OfferType) { + switch offerkind.Resolve(d.OfferType).PaymentCopy { case "inference": return inferenceCopy(url, d) case "agent": @@ -344,22 +345,6 @@ func buildTypeCopy(siteURL, endpoint string, d PaymentDisplay) typeCopy { } } -// normalizeOfferType collapses the spec.type values into the three render -// branches. Empty falls back to "inference" historically (the original -// default), but the storefront defaults new offers to "http" — match that -// behavior here so unknown/unset types stay on the safest (single-shot pay) -// CTA. -func normalizeOfferType(t string) string { - switch t { - case "inference": - return "inference" - case "agent": - return "agent" - default: - return "http" - } -} - // inferenceCopy: primary CTA is `obol buy inference`, the CLI command that // pre-authorizes the seller and registers the model as `paid/` in the // local LiteLLM gateway. Secondary cards still expose the agent-prompt and diff --git a/internal/x402/serviceoffer_source.go b/internal/x402/serviceoffer_source.go index 6c9925d7..5eb8343c 100644 --- a/internal/x402/serviceoffer_source.go +++ b/internal/x402/serviceoffer_source.go @@ -238,24 +238,25 @@ func routeRuleFromOffer(offer *monetizeapi.ServiceOffer, upstreamAuth string) (R } func effectivePrice(offer *monetizeapi.ServiceOffer) (price, priceModel, perMTok string, approx int, err error) { - switch { - case offer.Spec.Payment.Price.PerRequest != "": - return offer.Spec.Payment.Price.PerRequest, "perRequest", "", 0, nil - case offer.Spec.Payment.Price.PerMTok != "": - price, err := schemas.ApproximateRequestPriceFromPerMTok(offer.Spec.Payment.Price.PerMTok) - if err != nil { - return "", "", "", 0, fmt.Errorf("invalid perMTok price %q: %w", offer.Spec.Payment.Price.PerMTok, err) + raw, slot := offer.Spec.Payment.Price.RawAndSlot() + switch slot { + case "perRequest": + return raw, "perRequest", "", 0, nil + case "perMTok": + approxPrice, perr := schemas.ApproximateRequestPriceFromPerMTok(raw) + if perr != nil { + return "", "", "", 0, fmt.Errorf("invalid perMTok price %q: %w", raw, perr) } - return price, "perMTok", offer.Spec.Payment.Price.PerMTok, schemas.ApproxTokensPerRequest, nil - case offer.Spec.Payment.Price.PerHour != "": - return offer.Spec.Payment.Price.PerHour, "perHour", "", 0, nil - case offer.Spec.Payment.Price.PerMB != "": + return approxPrice, "perMTok", raw, schemas.ApproxTokensPerRequest, nil + case "perHour": + return raw, "perHour", "", 0, nil + case "perMB": // A dataset is bought once, so the enforced per-request price is the // TOTAL: perMB × (sizeBytes / 1e6). Returning the raw perMB would charge // a single megabyte's worth for the entire dataset. - total, err := schemas.TotalPriceFromPerMB(offer.Spec.Payment.Price.PerMB, offer.Spec.Dataset.SizeBytes) - if err != nil { - return "", "", "", 0, fmt.Errorf("invalid perMB price %q: %w", offer.Spec.Payment.Price.PerMB, err) + total, terr := schemas.TotalPriceFromPerMB(raw, offer.Spec.Dataset.SizeBytes) + if terr != nil { + return "", "", "", 0, fmt.Errorf("invalid perMB price %q: %w", raw, terr) } return total, "perMB", "", 0, nil default: diff --git a/internal/x402/verifier.go b/internal/x402/verifier.go index 22f285f1..f8c30b0e 100644 --- a/internal/x402/verifier.go +++ b/internal/x402/verifier.go @@ -12,6 +12,7 @@ import ( "strings" "sync/atomic" + "github.com/ObolNetwork/obol-stack/internal/offerkind" "github.com/prometheus/client_golang/prometheus" x402types "github.com/x402-foundation/x402/go/types" ) @@ -351,9 +352,7 @@ func (v *Verifier) matchPaidRouteFull(cfg *PricingConfig, uri string) (*RouteRul asset := ResolveAssetInfo(chain, rule) requirement := BuildV2RequirementWithAsset(chain, asset, rule.Price, wallet, rule.MaxTimeoutSeconds) - mergeAgentExtras(&requirement, rule) - mergeDatasetExtras(&requirement, rule) - mergeSkillExtras(&requirement, rule) + mergeTypedExtras(&requirement, rule) extensions := WithBazaar(BuildExtensionsForAsset(asset), rule.OfferType, rule.Model) return rule, requirement, extensions, prometheusLabels(rule), chain, asset, true } @@ -467,6 +466,30 @@ func mergeSkillExtras(req *x402types.PaymentRequirements, rule *RouteRule) { req.Extra["skill"] = skill } +// mergeTypedExtras attaches the 402 discovery metadata selected by the offer +// type's integrity profile (offerkind): agent routes surface model/skills; +// content-bearing types surface their content commitment — a signed-log +// dataset's {manifestHash,version,fileHash,sizeBytes} or a skill bundle's +// {name,version,sha256}. The per-merge helpers still self-gate on empty fields +// as a backstop, so this stays behavior-preserving given the invariant that +// serviceoffer_source.go only populates a type's RouteRule fields when the +// offer is that type. Driving the dispatch from the declared profile — instead +// of calling every merge unconditionally — makes "which integrity metadata a +// type carries" explicit and centrally declared rather than implied by which +// fields happen to be set. +func mergeTypedExtras(req *x402types.PaymentRequirements, rule *RouteRule) { + kind := offerkind.Resolve(rule.OfferType) + if kind.ResolvesAgentRef { + mergeAgentExtras(req, rule) + } + switch kind.Integrity.Content { + case offerkind.ContentSignedVersionLog: + mergeDatasetExtras(req, rule) + case offerkind.ContentBundleSHA256: + mergeSkillExtras(req, rule) + } +} + // buildPaymentDisplay turns the matched rule + chain + asset into pre-formatted // strings for the HTML 402 page. The atomic-amount input is the value already // computed for the wire requirement (rule.Price * 10^decimals), so passing From b11330c2af490bdfa2d2d0f16bc21ea9a88a2558 Mon Sep 17 00:00:00 2001 From: bussyjd <145845+bussyjd@users.noreply.github.com> Date: Tue, 16 Jun 2026 12:32:59 +0400 Subject: [PATCH 08/11] fix(x402): base-sepolia USDC EIP-712 domain name is "USDC", with offline guards MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Base-Sepolia USDC (FiatTokenV2_2) signs its EIP-712 domain under name "USDC", not the mainnet "USD Coin" — verified: the on-chain DOMAIN_SEPARATOR() equals the domain built with "USDC". chains.go advertised "USD Coin", so the 402 a standalone seller emits made every host-side EIP-3009 signature fail a REAL facilitator (the cluster buyer buy.py and the catalog renderer already hardcoded "USDC", which is why only host-side buyers broke and the stub facilitator masked it). Two offline guards so it cannot recur — the recurring root cause was the name being hand-maintained in several independent places that drifted: TestUSDCDomainSeparatorsMatchOnChain pins each chain to its captured on-chain DOMAIN_SEPARATOR (via the same apitypes path the signer uses); TestCatalogUSDCMatchesVerifierChain pins the catalog renderer and the x402 registry to each other. Surfaced by flows/p2p-surface-smoke.sh against a live x402-rs facilitator. --- .../usdc_domain_consistency_test.go | 42 ++++++++++ internal/x402/chains.go | 19 +++-- internal/x402/chains_domain_test.go | 80 +++++++++++++++++++ 3 files changed, 135 insertions(+), 6 deletions(-) create mode 100644 internal/serviceoffercontroller/usdc_domain_consistency_test.go create mode 100644 internal/x402/chains_domain_test.go diff --git a/internal/serviceoffercontroller/usdc_domain_consistency_test.go b/internal/serviceoffercontroller/usdc_domain_consistency_test.go new file mode 100644 index 00000000..46286e3b --- /dev/null +++ b/internal/serviceoffercontroller/usdc_domain_consistency_test.go @@ -0,0 +1,42 @@ +package serviceoffercontroller + +import ( + "testing" + + "github.com/ObolNetwork/obol-stack/internal/x402" +) + +// TestCatalogUSDCMatchesVerifierChain guards against the EIP-712 USDC domain +// name (and version) drifting between the TWO independent Go sources that must +// agree: the catalog renderer's defaultUSDCForNetwork (what /api/services.json +// advertises) and x402's chain registry (what the 402 advertises and the buyer +// signs under). They disagreed once — chains.go said "USD Coin" for base-sepolia +// while the catalog already said the correct "USDC" — which silently broke +// host-side EIP-3009 signatures against a real facilitator and kept recurring +// because each source was hand-maintained. +// +// x402's TestUSDCDomainSeparatorsMatchOnChain pins the registry to the on-chain +// value; this test pins the catalog and the registry to EACH OTHER, so a future +// edit to one without the other fails offline at `go test`. +func TestCatalogUSDCMatchesVerifierChain(t *testing.T) { + for _, net := range []string{"base", "base-sepolia", "ethereum"} { + t.Run(net, func(t *testing.T) { + cat, ok := defaultUSDCForNetwork(net) + if !ok || cat.EIP712Domain == nil { + t.Fatalf("catalog has no USDC EIP-712 domain for %q", net) + } + ci, err := x402.ResolveChainInfo(net) + if err != nil { + t.Fatalf("x402.ResolveChainInfo(%q): %v", net, err) + } + if cat.EIP712Domain.Name != ci.EIP3009Name { + t.Errorf("%s EIP-712 name drift: catalog=%q vs verifier=%q — both must equal the on-chain token domain (base-sepolia is \"USDC\", mainnet is \"USD Coin\")", + net, cat.EIP712Domain.Name, ci.EIP3009Name) + } + if cat.EIP712Domain.Version != ci.EIP3009Version { + t.Errorf("%s EIP-712 version drift: catalog=%q vs verifier=%q", + net, cat.EIP712Domain.Version, ci.EIP3009Version) + } + }) + } +} diff --git a/internal/x402/chains.go b/internal/x402/chains.go index c6d508da..92269a50 100644 --- a/internal/x402/chains.go +++ b/internal/x402/chains.go @@ -66,12 +66,19 @@ var ( } ChainBaseSepolia = ChainInfo{ - Name: "base-sepolia", - NetworkID: "base-sepolia", - CAIP2Network: "eip155:84532", - USDCAddress: "0x036CbD53842c5426634e7929541eC2318f3dCF7e", - Decimals: 6, - EIP3009Name: "USD Coin", + Name: "base-sepolia", + NetworkID: "base-sepolia", + CAIP2Network: "eip155:84532", + USDCAddress: "0x036CbD53842c5426634e7929541eC2318f3dCF7e", + Decimals: 6, + // Base-Sepolia USDC is FiatTokenV2_2 whose EIP-712 domain name is "USDC" + // (verified: on-chain DOMAIN_SEPARATOR() == keccak of the domain built + // with "USDC"), NOT the mainnet "USD Coin". Advertising "USD Coin" makes + // every EIP-3009 signature fail FiatToken's SignatureChecker against a + // REAL facilitator — the recurring base-sepolia "name" bug that the stub + // facilitator silently masked. TestUSDCDomainSeparatorsMatchOnChain pins + // this so it can never regress. + EIP3009Name: "USDC", EIP3009Version: "2", } diff --git a/internal/x402/chains_domain_test.go b/internal/x402/chains_domain_test.go new file mode 100644 index 00000000..af45d232 --- /dev/null +++ b/internal/x402/chains_domain_test.go @@ -0,0 +1,80 @@ +package x402 + +import ( + "testing" + + gethmath "github.com/ethereum/go-ethereum/common/math" + "github.com/ethereum/go-ethereum/signer/core/apitypes" +) + +// goldenUSDCDomainSeparators pins each chain's USDC EIP-712 DOMAIN_SEPARATOR as +// read from the live token contract: +// +// cast call "DOMAIN_SEPARATOR()(bytes32)" --rpc-url +// +// The domain separator is a deterministic function of the four fields a buyer +// signs under — (name, version, chainId, verifyingContract). Pinning it turns +// the recurring base-sepolia "USD Coin" vs "USDC" EIP-712 *name* bug into an +// OFFLINE `go test` failure: a wrong name yields a different separator, so an +// EIP-3009 signature built from this registry would be rejected by a real +// facilitator (FiatToken's SignatureChecker). The bug bit ~repeatedly because +// nothing tied the hand-maintained name string to the on-chain domain; this +// closes that loop. Capture and add a chain's value here as you verify it. +var goldenUSDCDomainSeparators = []struct { + name string + chain ChainInfo + golden string +}{ + // Base-Sepolia USDC is FiatTokenV2_2 — domain name "USDC", NOT "USD Coin". + {"base-sepolia", ChainBaseSepolia, "0x71f17a3b2ff373b803d70a5a07c046c1a2bc8e89c09ef722fcb047abe94c9818"}, +} + +func TestUSDCDomainSeparatorsMatchOnChain(t *testing.T) { + for _, tc := range goldenUSDCDomainSeparators { + t.Run(tc.name, func(t *testing.T) { + got, err := usdcDomainSeparator(tc.chain) + if err != nil { + t.Fatalf("compute domain separator: %v", err) + } + if got != tc.golden { + t.Errorf("%s USDC EIP-712 domain separator = %s, want on-chain %s\n"+ + " registry has EIP3009Name=%q version=%q addr=%s — the name almost certainly\n"+ + " disagrees with the on-chain token domain (base-sepolia FiatTokenV2_2 is \"USDC\",\n"+ + " mainnet USDC is \"USD Coin\"). A real facilitator will reject signatures built here.", + tc.name, got, tc.golden, tc.chain.EIP3009Name, tc.chain.EIP3009Version, tc.chain.USDCAddress) + } + }) + } +} + +// usdcDomainSeparator computes the EIP-712 domain separator a buyer signs under +// for ci's USDC via the SAME apitypes path SignExactPayment uses, so this guards +// the exact value that reaches a facilitator — not a re-derivation that could +// drift from the signer. +func usdcDomainSeparator(ci ChainInfo) (string, error) { + chainID, err := chainIDFromNetwork(ci.CAIP2Network) + if err != nil { + return "", err + } + td := apitypes.TypedData{ + Types: apitypes.Types{ + "EIP712Domain": { + {Name: "name", Type: "string"}, + {Name: "version", Type: "string"}, + {Name: "chainId", Type: "uint256"}, + {Name: "verifyingContract", Type: "address"}, + }, + }, + Domain: apitypes.TypedDataDomain{ + Name: ci.EIP3009Name, + Version: ci.EIP3009Version, + ChainId: gethmath.NewHexOrDecimal256(chainID), + VerifyingContract: ci.USDCAddress, + }, + } + sep, err := td.HashStruct("EIP712Domain", td.Domain.Map()) + if err != nil { + return "", err + } + return sep.String(), nil +} From e496caaa7712ba615f014d329b4af599707ce282 Mon Sep 17 00:00:00 2001 From: bussyjd <145845+bussyjd@users.noreply.github.com> Date: Tue, 16 Jun 2026 16:36:16 +0400 Subject: [PATCH 09/11] =?UTF-8?q?test(flows):=20host-side=20P2P=20surface?= =?UTF-8?q?=20smoke=20=E2=80=94=20settlement,=20paid=20dataset=20join,=20r?= =?UTF-8?q?esearch,=20--secure?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit flows/p2p-surface-smoke.sh covers the host-P2P gaps release-smoke never touches: direct-P2P inference 402 + remote-model proxy, paid dataset /join/paid, research membership->submit->payout, on-chain settlement (1d/2e) via a local facilitator, and the --secure transport gate (named-tunnel 4a, tailnet 4b/4c). Adds 'obol sell data publish --facilitator' and the cloudflared 2026.6.0 bump. 13 PASS / 2 SKIP with facilitator + tunnel up. --- cmd/obol/dataset.go | 3 +- flows/README.md | 27 ++ flows/p2p-surface-smoke.sh | 431 ++++++++++++++++++ flows/tools/x402-sign/main.go | 77 ++++ .../infrastructure/cloudflared/values.yaml | 2 +- 5 files changed, 538 insertions(+), 2 deletions(-) create mode 100755 flows/p2p-surface-smoke.sh create mode 100644 flows/tools/x402-sign/main.go diff --git a/cmd/obol/dataset.go b/cmd/obol/dataset.go index d17d149f..e4f090f9 100644 --- a/cmd/obol/dataset.go +++ b/cmd/obol/dataset.go @@ -162,6 +162,7 @@ func datasetPublishCommand(cfg *config.Config) *cli.Command { &cli.StringFlag{Name: "price", Usage: "Per-join price in USDC (enables x402 paid join; empty = invite/open only)"}, &cli.StringFlag{Name: "pay-to", Usage: "USDC recipient (default: the dataset owner address)"}, &cli.StringFlag{Name: "chain", Usage: "Payment chain", Value: "base-sepolia"}, + &cli.StringFlag{Name: "facilitator", Usage: "x402 facilitator URL for paid-join verify+settle (point at a local anvil fork for tests)", Value: x402.DefaultFacilitatorURL}, }, Action: func(ctx context.Context, cmd *cli.Command) error { u := getUI(cmd) @@ -220,7 +221,7 @@ func datasetPublishCommand(cfg *config.Config) *cli.Command { req := x402.BuildV2Requirement(chain, price, payTo, 0) joinAtomic = req.Amount paidJoin = x402.NewForwardAuthMiddleware(x402.ForwardAuthConfig{ - FacilitatorURL: x402.DefaultFacilitatorURL, + FacilitatorURL: cmd.String("facilitator"), VerifyOnly: false, SettlesInProcess: true, }, []x402types.PaymentRequirements{req}) diff --git a/flows/README.md b/flows/README.md index 54953b44..cc73a717 100644 --- a/flows/README.md +++ b/flows/README.md @@ -29,6 +29,33 @@ holds small structured helpers used by the shell entrypoints. Keep new contract rather than duplicating stack, DNS, wallet, or config-mutation helpers. `release-smoke.sh` is the release gate. +`hf-surface-smoke.sh` and `p2p-surface-smoke.sh` are out-of-band, host-side +"surface" smokes — no cluster required, and each check SKIPs on a missing +prerequisite rather than aborting. They cover the peer-to-peer / host-gateway +paths release-smoke (entirely the cluster path) does not: + +- `hf-surface-smoke.sh` — dataset hub (anonymize → sign → publish → unpaid buy), + fine-tune-on-spark provenance binding, router + ERC-8004 indexer discovery. +- `p2p-surface-smoke.sh` — standalone `obol sell inference` 402 emission + + remote-model proxy (model served on `spark1`, reached via SSH forward); the + paid dataset `/join/paid` x402 gate + `buy dataset --join` client guards + (402 challenge, `--max-price` cap, fail-closed download); and the research + membership → submit → payout E2E asserting **token-derived** worker identity + (not the self-declared field) and best-per-worker payouts. When a local anvil + base-sepolia fork + x402 facilitator are reachable (auto-detected; stand them + up with flow-10), the **paid 200** (a signed EIP-3009 `X-PAYMENT` verified + + settled, via the `flows/tools/x402-sign` host signer) and the **paid dataset + mint + verified download** settle on chain for real; otherwise those two legs + SKIP. The `--secure` transport gate (Surface 4) auto-activates when a genuinely + non-secure origin exists: **4a** (ACCEPT over a NAMED cloudflared tunnel) when + `SECURE_TUNNEL_NAME` + `SECURE_TUNNEL_HOSTNAME` are set (after `cloudflared + tunnel login` + routing a hostname on your CF domain); **4b/4c** (REJECT/ACCEPT + a CGNAT plaintext origin) when THIS host is on a tailnet (`tailscale up`) so a + remote peer (`SECURE_ORIGIN_SSH`, default `spark1`) can reach a non-private mac + IP — loopback/RFC1918 are always "secure". Each leg SKIPs precisely until its + prereq is set; the gate is also unit-tested. Run e.g. + `OBOL_BIN=.workspace/bin/obol SPARK=spark1 bash flows/p2p-surface-smoke.sh`. + ## Running a flow detached over SSH `nohup` and `setsid -f` get reaped when an SSH session ending closes the diff --git a/flows/p2p-surface-smoke.sh b/flows/p2p-surface-smoke.sh new file mode 100755 index 00000000..594309f9 --- /dev/null +++ b/flows/p2p-surface-smoke.sh @@ -0,0 +1,431 @@ +#!/usr/bin/env bash +# P2P-surface smoke — fills the smoke-coverage GAPS the flows report flagged for +# the peer-to-peer / host-gateway paths that neither release-smoke.sh (entirely +# the cluster path) nor hf-surface-smoke.sh (unpaid dataset only) cover: +# +# 1. Direct-P2P inference standalone `obol sell inference` gateway emits a +# valid x402 402 and is wired to a REMOTE model +# (spark1, over an SSH forward) — the no-cluster +# seller path that nothing smoked before. +# 2. Paid dataset join `obol sell data publish --price` x402 /join/paid +# gate + `obol buy dataset --join` client: the 402 +# paid-join challenge, the --max-price cap (rejects +# before signing), and fail-closed download. +# 3. Research E2E `obol research publish` membership → submit → +# payout, asserting worker identity is TOKEN-derived +# (not the self-declared field) and payouts are +# best-per-worker (the H4 fixes), end to end. +# +# Each section SKIPs on a missing prerequisite, never aborts (hf-surface style). +# +# Settlement & transport coverage: +# - Full on-chain x402 SETTLEMENT (1d paid 200, 2e paid mint+download) runs for +# REAL when a local anvil base-sepolia fork (to fund test wallets) AND an x402 +# facilitator are reachable (stand them up with flow-10). Otherwise those two +# legs SKIP; the 402 emission, client --max-price cap, and fail-closed checks +# need no facilitator and always run. +# - The `--secure` transport gate (Surface 4): exercised E2E when a genuinely +# non-secure origin exists. 4a (ACCEPT over a NAMED cloudflared tunnel) needs +# named-tunnel creds + SECURE_TUNNEL_NAME/SECURE_TUNNEL_HOSTNAME; 4b/4c +# (REJECT/ACCEPT a CGNAT plaintext origin) need THIS host on a tailnet so a +# remote peer can reach a non-private mac IP — requestIsSecure() treats +# loopback AND every RFC1918 origin as secure. Each leg auto-activates when +# its prereq is present and SKIPs precisely otherwise; the gate is also +# unit-tested (internal/x402/forwardauth_test.go). +# +# --secure activation env: +# SECURE_TUNNEL_NAME / SECURE_TUNNEL_HOSTNAME named cloudflared tunnel (4a) +# SECURE_ORIGIN_SSH tailnet peer that originates the plaintext probe (4b/4c; +# default: $SPARK). Requires `tailscale up` on THIS host. +# +# Overridable env: +# OBOL_BIN built obol (default: build from this tree) +# SPARK ssh host serving the inference upstream (default: spark1; "" skips) +# SPARK_PORT local port SSH-forwarded to spark's ollama (default: 11435) +# SPARK_MODEL model to serve from spark (default: qwen3:0.6b) +# FACILITATOR x402 facilitator URL for settlement (default: auto-detect :4040) +# ANVIL_RPC base-sepolia fork RPC for funding test wallets (default: :8545) +set -uo pipefail + +ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +WORK="$(mktemp -d)" +OBOL_BIN="${OBOL_BIN:-$WORK/obol}" +SPARK="${SPARK-spark1}" +SPARK_PORT="${SPARK_PORT:-11435}" +SPARK_MODEL="${SPARK_MODEL:-qwen3:0.6b}" +FACILITATOR="${FACILITATOR:-}" +SKILLS="$ROOT/internal/embed/skills" + +INF_PORT=18402 +DS_PORT=18951 +RES_PORT=18981 +SPARK_FWD_PID="" + +declare -a RESULTS +pass() { RESULTS+=("PASS $1"); echo " ✓ $1"; } +skip() { RESULTS+=("SKIP $1 — $2"); echo " - SKIP $1 — $2"; } +fail() { RESULTS+=("FAIL $1 — $2"); echo " ✗ FAIL $1 — $2"; } +section() { echo; echo "=== $1 ==="; } + +cleanup() { + jobs -p | xargs -r kill 2>/dev/null + [ -n "$SPARK_FWD_PID" ] && kill "$SPARK_FWD_PID" 2>/dev/null + rm -rf "$WORK" +} +trap cleanup EXIT + +# --- build obol if needed --- +if [ ! -x "$OBOL_BIN" ]; then + echo "Building obol …" + ( cd "$ROOT" && go build -o "$OBOL_BIN" ./cmd/obol ) || { echo "obol build failed"; exit 1; } +fi +# A FRESH config dir with no kubeconfig keeps `sell inference` in standalone +# (no-cluster) mode — exactly the direct-P2P path we want to smoke. +export OBOL_CONFIG_DIR="$WORK/config" +mkdir -p "$OBOL_CONFIG_DIR" + +# Free our ports from any orphan left by a prior aborted run. +for p in "$INF_PORT" "$DS_PORT" "$RES_PORT"; do + lsof -nP -iTCP:"$p" -sTCP:LISTEN -t 2>/dev/null | xargs -r kill 2>/dev/null +done + +# --- optional settlement prerequisites (local anvil base-sepolia fork + x402 +# facilitator). When both are reachable, 1d/2e settle on chain for real; else +# they SKIP. Reuses flow-10's infra — stand it up with: +# bash flows/flow-10-anvil-facilitator.sh +ANVIL_RPC="${ANVIL_RPC:-http://127.0.0.1:8545}" +USDC_ADDR="${USDC_ADDR:-0x036CbD53842c5426634e7929541eC2318f3dCF7e}" +SETTLE=0 +XSIGN="$WORK/x402-sign" +if command -v cast >/dev/null 2>&1 \ + && [ "$(curl -s --max-time 4 "$ANVIL_RPC" -X POST -H 'Content-Type: application/json' \ + -d '{"jsonrpc":"2.0","method":"eth_chainId","params":[],"id":1}' 2>/dev/null \ + | jq -r '.result // empty' 2>/dev/null)" = "0x14a34" ]; then + if [ -z "$FACILITATOR" ] && curl -sf --max-time 4 http://127.0.0.1:4040/supported >/dev/null 2>&1; then + FACILITATOR="http://127.0.0.1:4040" + fi + if [ -n "$FACILITATOR" ] && curl -sf --max-time 4 "$FACILITATOR/supported" >/dev/null 2>&1; then + # Host-side raw X-PAYMENT signer — no buyer CLI exists for `sell inference`. + ( cd "$ROOT" && go build -o "$XSIGN" ./flows/tools/x402-sign ) >/dev/null 2>&1 \ + && [ -x "$XSIGN" ] && SETTLE=1 + fi +fi + +fund_usdc() { # $1=address — mint 1000 USDC to its FiatToken balanceOf slot (idx 9) + local slot; slot=$(cast index address "$1" 9 2>/dev/null) + cast rpc anvil_setStorageAt "$USDC_ADDR" "$slot" \ + "0x000000000000000000000000000000000000000000000000000000003B9ACA00" \ + --rpc-url "$ANVIL_RPC" >/dev/null 2>&1 +} +sync_anvil_clock() { # avoid 'authorization not yet valid' on a long-lived fork (pitfall 18) + cast rpc evm_setNextBlockTimestamp "$(date +%s)" --rpc-url "$ANVIL_RPC" >/dev/null 2>&1 || true + cast rpc anvil_mine 1 --rpc-url "$ANVIL_RPC" >/dev/null 2>&1 || true +} + +# =========================================================================== +section "Surface 1 — Direct-P2P inference (standalone gateway → remote model)" +# =========================================================================== +UPSTREAM="http://127.0.0.1:$SPARK_PORT" +SPARK_OK=0 +if [ -z "$SPARK" ]; then + skip "1 spark upstream" "SPARK unset" +elif ! ssh -o BatchMode=yes -o ConnectTimeout=6 "$SPARK" 'command -v ollama >/dev/null' >/dev/null 2>&1; then + skip "1 spark upstream" "$SPARK unreachable or no ollama" +else + # Ensure the model is served on the remote box, then forward it locally so the + # seller's gateway proxies to a model running on ANOTHER machine (real P2P). + ssh "$SPARK" "pgrep -x ollama >/dev/null || (OLLAMA_HOST=0.0.0.0:11434 nohup ollama serve >/tmp/ollama.log 2>&1 &); OLLAMA_HOST=0.0.0.0:11434 ollama pull $SPARK_MODEL >/dev/null 2>&1" >/dev/null 2>&1 + lsof -nP -iTCP:"$SPARK_PORT" -sTCP:LISTEN -t 2>/dev/null | xargs -r kill 2>/dev/null + ssh -fN -o ConnectTimeout=8 -o ExitOnForwardFailure=yes -L "$SPARK_PORT:127.0.0.1:11434" "$SPARK" \ + && SPARK_FWD_PID=$(pgrep -f "$SPARK_PORT:127.0.0.1:11434" | head -1) + sleep 1 + if curl -sf --max-time 10 "$UPSTREAM/api/tags" >/dev/null 2>&1; then + pass "1a spark upstream reachable — model on $SPARK served via SSH forward :$SPARK_PORT" + SPARK_OK=1 + else + skip "1a spark upstream" "forward to $SPARK:11434 not reachable" + UPSTREAM="http://127.0.0.1:11434" # fall back to host ollama for the gate test + fi +fi + +PAY_TO="0x1111111111111111111111111111111111111111" +"$OBOL_BIN" sell inference p2p-smoke --model "$SPARK_MODEL" --pay-to "$PAY_TO" \ + --price 0.001 --chain base-sepolia --upstream "$UPSTREAM" --listen "127.0.0.1:$INF_PORT" \ + ${FACILITATOR:+--facilitator "$FACILITATOR"} >"$WORK/inf-gw.log" 2>&1 & +# Wait for the gateway to listen. +for _ in $(seq 1 25); do curl -s -o /dev/null "http://127.0.0.1:$INF_PORT/v1/chat/completions" && break; sleep 0.5; done + +CODE=$(curl -s -o "$WORK/inf-402.json" -w '%{http_code}' --max-time 8 \ + "http://127.0.0.1:$INF_PORT/v1/chat/completions" -H 'Content-Type: application/json' \ + -d "{\"model\":\"$SPARK_MODEL\",\"messages\":[{\"role\":\"user\",\"content\":\"hi\"}]}") +if [ "$CODE" = "402" ] && jq -e '.accepts[0] | select(.scheme=="exact" and .payTo=="'"$PAY_TO"'" and .amount=="1000")' "$WORK/inf-402.json" >/dev/null 2>&1; then + pass "1b standalone gateway emits x402 402 (exact / base-sepolia USDC / amount 1000 / payTo)" +else + fail "1b inference 402" "expected 402 + accepts[exact,payTo,amount], got HTTP $CODE: $(head -c 160 "$WORK/inf-402.json")" +fi + +if [ "$SPARK_OK" = 1 ]; then + RESP=$(curl -s --max-time 60 "$UPSTREAM/v1/chat/completions" -H 'Content-Type: application/json' \ + -d "{\"model\":\"$SPARK_MODEL\",\"messages\":[{\"role\":\"user\",\"content\":\"ping\"}],\"max_tokens\":64,\"stream\":false}") + if echo "$RESP" | jq -e '.choices[0].message' >/dev/null 2>&1; then + pass "1c remote model on $SPARK serves a valid completion (the resource the gateway proxies)" + else + fail "1c spark inference" "no valid completion: $(echo "$RESP" | head -c 120)" + fi +fi + +if [ "$SETTLE" = 1 ] && jq -e '.accepts[0].asset and .accepts[0].extra.name' "$WORK/inf-402.json" >/dev/null 2>&1; then + NEW=$(cast wallet new 2>/dev/null) + BUYER=$(echo "$NEW" | awk '/Address/{print $2}'); BKEY=$(echo "$NEW" | awk '/Private key/{print $3}') + fund_usdc "$BUYER"; sync_anvil_clock + XPAY=$(jq -c '.' "$WORK/inf-402.json" | X402_SIGN_KEY="$BKEY" "$XSIGN" 2>/dev/null) + PCODE=$(curl -s -o "$WORK/inf-paid.json" -w '%{http_code}' --max-time 90 \ + "http://127.0.0.1:$INF_PORT/v1/chat/completions" -H 'Content-Type: application/json' \ + -H "X-PAYMENT: $XPAY" \ + -d "{\"model\":\"$SPARK_MODEL\",\"messages\":[{\"role\":\"user\",\"content\":\"one word hi\"}],\"max_tokens\":16,\"stream\":false}") + if [ "$PCODE" = "200" ] && jq -e '.choices[0]' "$WORK/inf-paid.json" >/dev/null 2>&1; then + pass "1d paid 200 — fresh EOA signs EIP-3009 X-PAYMENT, gateway verifies+settles via facilitator, proxies upstream" + else + fail "1d paid settlement" "expected 200 + choices, got HTTP $PCODE: $(head -c 160 "$WORK/inf-paid.json")" + fi +else + skip "1d paid 200 (full settlement)" "needs a local anvil base-sepolia fork + x402 facilitator (flow-10); 402/proxy covered above" +fi + +# =========================================================================== +section "Surface 2 — Paid dataset join (x402 /join/paid gate + buy --join)" +# =========================================================================== +DS_ID="p2p-ds" +BUNDLE="$WORK/ds"; mkdir -p "$BUNDLE" +printf '{"messages":[{"role":"user","content":"q"},{"role":"assistant","content":"a"}]}\n' > "$BUNDLE/sft.jsonl" +HASH=$(shasum -a 256 "$BUNDLE/sft.jsonl" | awk '{print $1}') +printf '{"hash":"%s","files":["sft.jsonl"]}\n' "$HASH" > "$BUNDLE/manifest.json" + +if "$OBOL_BIN" sell data from "$BUNDLE" --name "$DS_ID" >/dev/null 2>&1 \ + && "$OBOL_BIN" sell data verify "$DS_ID" 2>&1 | grep -q 'Chain valid'; then + pass "2a sign + verify — signed version chain valid" +else + fail "2a sign+verify" "version not recorded or chain invalid" +fi + +# Publish WITH a per-join price → enables the x402 /join/paid gate. +"$OBOL_BIN" sell data publish "$DS_ID" --membership open --port "$DS_PORT" --no-tunnel \ + --price 0.001 --pay-to "$PAY_TO" --chain base-sepolia \ + ${FACILITATOR:+--facilitator "$FACILITATOR"} >"$WORK/ds-pub.log" 2>&1 & +curl -sf --retry 25 --retry-connrefused --retry-delay 1 "http://127.0.0.1:$DS_PORT/healthz" >/dev/null 2>&1 +DS_URL="http://127.0.0.1:$DS_PORT" + +CODE=$(curl -s -o "$WORK/join-402.json" -w '%{http_code}' --max-time 8 -X POST "$DS_URL/dataset/$DS_ID/join/paid") +if [ "$CODE" = "402" ] && jq -e '.accepts[0].scheme=="exact"' "$WORK/join-402.json" >/dev/null 2>&1; then + pass "2b /join/paid emits an x402 402 paid-join challenge" +else + fail "2b paid-join 402" "expected 402 + accepts, got HTTP $CODE: $(head -c 160 "$WORK/join-402.json")" +fi + +# --max-price below the advertised price must reject BEFORE signing (no chain). +OUT=$("$OBOL_BIN" buy dataset "$DS_URL" --id "$DS_ID" --join --max-price 1 --out "$WORK/never.jsonl" 2>&1) +if echo "$OUT" | grep -qiE 'exceeds .*max-price|max-price'; then + pass "2c buy --join --max-price caps the join price (rejects before signing)" +else + fail "2c max-price cap" "expected a max-price rejection, got: $(echo "$OUT" | head -c 160)" +fi + +# Fail-closed: a download without a member token must be refused. +CODE=$(curl -s -o /dev/null -w '%{http_code}' --max-time 8 "$DS_URL/dataset/$DS_ID/download?version=1") +if [ "$CODE" = "401" ] || [ "$CODE" = "403" ]; then + pass "2d download fails closed without a member token (HTTP $CODE)" +else + fail "2d fail-closed" "expected 401/403, got HTTP $CODE" +fi + +if [ "$SETTLE" = 1 ]; then + # Two-pass: pass 1 surfaces+creates the buyer wallet (unfunded → 503); fund it + # on the fork, then pass 2 settles, mints a version-scoped token, downloads. + OWNER=$("$OBOL_BIN" sell data verify "$DS_ID" 2>/dev/null | grep -oiE '0x[0-9a-fA-F]{40}' | head -1) + P1=$("$OBOL_BIN" buy dataset "$DS_URL" --id "$DS_ID" --join --out "$WORK/paid1.jsonl" 2>&1) + BUYER=$(echo "$P1" | grep -oiE '0x[0-9a-fA-F]{40}' | head -1) + fund_usdc "$BUYER"; sync_anvil_clock + P2=$("$OBOL_BIN" buy dataset "$DS_URL" --id "$DS_ID" --join ${OWNER:+--owner "$OWNER"} --out "$WORK/paid2.jsonl" 2>&1) + if echo "$P2" | grep -qi 'minted' && [ -s "$WORK/paid2.jsonl" ]; then + pass "2e paid join settles — minted a version-scoped token + verified signed-log download ($(wc -c <"$WORK/paid2.jsonl" | tr -d ' ') bytes)" + else + fail "2e paid mint+download" "settlement failed: $(echo "$P2" | tail -1 | head -c 160)" + fi +else + skip "2e paid mint+download (full settlement)" "needs a local anvil base-sepolia fork + x402 facilitator (flow-10)" +fi + +# =========================================================================== +section "Surface 3 — Research E2E (membership → submit → payout, token identity)" +# =========================================================================== +# Inline stdlib worker: device-auth join (open mode auto-approves), submit a +# metric, print the member token. Exercises the real server endpoints. +cat > "$WORK/rworker.py" <<'PY' +import json,sys,time,urllib.request +def call(url,token,body=None): + m="POST" if body is not None else "GET" + req=urllib.request.Request(url,data=(json.dumps(body).encode() if body is not None else None),method=m) + req.add_header("Content-Type","application/json") + if token: req.add_header("Authorization","Bearer "+token) + return json.loads(urllib.request.urlopen(req,timeout=30).read()) +kb,worker,value=sys.argv[1].rstrip("/"),sys.argv[2],float(sys.argv[3]) +g=call(kb+"/auth/device/code",None,{"worker":worker}) +tok=None +for _ in range(30): + r=call(kb+"/auth/device/token",None,{"device_code":g["device_code"]}) + if r.get("status")=="authorized": tok=r["token"]; break + time.sleep(1) +if not tok: sys.exit("join timed out") +call(kb+"/results",tok,{"worker":worker,"value":value,"output":"p2p-smoke"}) +print(tok) +PY + +"$OBOL_BIN" research publish demo-prog --metric val_bpb --direction minimize \ + --accept threshold --threshold 1.0 --baseline 1.0 --pool 100 --token OBOL \ + --membership open --no-tunnel --port "$RES_PORT" >"$WORK/res-pub.log" 2>&1 & +if curl -sf --retry 25 --retry-connrefused --retry-delay 1 "http://127.0.0.1:$RES_PORT/healthz" >/dev/null 2>&1; then + RES_URL="http://127.0.0.1:$RES_PORT" + # Worker A (value 0.50) and worker B (value 0.40) submit under the SAME + # self-declared name "spoof" — if identity were the self-declared field they + # would collapse to one worker. spark computes B's metric when reachable. + BVAL=0.40 + if [ -n "$SPARK" ] && ssh -o BatchMode=yes -o ConnectTimeout=6 "$SPARK" 'echo ok' >/dev/null 2>&1; then + BVAL=$(ssh "$SPARK" 'echo 0.40' 2>/dev/null || echo 0.40) + fi + TOKA=$(python3 "$WORK/rworker.py" "$RES_URL" spoof 0.50 2>/dev/null) + TOKB=$(python3 "$WORK/rworker.py" "$RES_URL" spoof "$BVAL" 2>/dev/null) + STATUS=$(curl -s --max-time 8 -H "Authorization: Bearer $TOKA" "$RES_URL/status" 2>/dev/null) + echo "$STATUS" > "$WORK/res-status.json" + + if [ -n "$TOKA" ] && [ -n "$TOKB" ] && [ "$TOKA" != "$TOKB" ]; then + pass "3a two workers admitted (open membership), distinct member tokens minted" + else + fail "3a membership" "join failed (tokA=$TOKA tokB=$TOKB)" + fi + + python3 - "$WORK/res-status.json" <<'PY' && pass "3b token-derived identity + best-per-worker payouts + champion" || fail "3b payout/identity" "see status json" +import json,sys +d=json.load(open(sys.argv[1])) +results=d.get("results") or [] +workers={r.get("worker") for r in results} +champ=(d.get("champion") or {}).get("value") +payouts=d.get("payouts") or {} +# two DISTINCT token-derived worker ids despite the shared self-declared name +assert len(workers)>=2, ("identity collapsed to self-declared name", workers) +assert all(str(w).startswith("w-") for w in workers), ("identity not token-derived", workers) +# champion is the better (lower) value +assert abs(float(champ)-0.40)<1e-6, ("champion not best", champ) +# payouts split across both workers, best-per-worker, summing to the pool +vals=[v for v in payouts.values() if isinstance(v,(int,float))] +assert len(payouts)>=2 and all(v>0 for v in vals) and abs(sum(vals)-100.0)<1.0, ("payout split wrong", payouts) +print("OK", "workers=",len(workers),"champ=",champ,"payouts=",payouts) +PY +else + skip "3 research" "research publish did not become healthy" +fi + +# A second, CLI-surface smoke of the operator view. +if "$OBOL_BIN" research status demo-prog >/dev/null 2>&1; then + pass "3c obol research status (operator CLI) returns the program state" +else + skip "3c research status CLI" "status command unavailable" +fi + +# =========================================================================== +section "Surface 4 — --secure transport gate (needs a real non-secure origin)" +# =========================================================================== +# requestIsSecure() (internal/x402/forwardauth.go) treats loopback AND every +# RFC1918-private origin as secure, so exercising the gate E2E needs a genuinely +# non-secure peer. Each leg auto-activates when its prereq is present and SKIPs +# precisely otherwise (this host has neither today: not on a tailnet, no +# cloudflared named-tunnel creds). +SEC_PORT=18403 +SEC_PID="" +SEC_BODY="{\"model\":\"$SPARK_MODEL\",\"messages\":[{\"role\":\"user\",\"content\":\"hi\"}]}" +# Cloudflare WAF on some zones (e.g. v1337.org) 403s default curl/urllib UAs +# (managed-challenge / error 1010); send a browser UA so 4a reaches the origin. +SEC_UA="${SECURE_TUNNEL_UA:-Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0 Safari/537.36}" +secure_gw() { # $1 = extra flags (e.g. --secure); binds 0.0.0.0 so a tailnet peer reaches it + [ -n "$SEC_PID" ] && kill "$SEC_PID" 2>/dev/null; SEC_PID="" + lsof -nP -iTCP:"$SEC_PORT" -sTCP:LISTEN -t 2>/dev/null | xargs -r kill 2>/dev/null + # shellcheck disable=SC2086 + "$OBOL_BIN" sell inference p2p-secure --model "$SPARK_MODEL" --pay-to "$PAY_TO" --price 0.001 \ + --chain base-sepolia --upstream "$UPSTREAM" --listen "0.0.0.0:$SEC_PORT" $1 \ + >"$WORK/sec-gw.log" 2>&1 & + SEC_PID=$! + for _ in $(seq 1 25); do curl -s -o /dev/null "http://127.0.0.1:$SEC_PORT/v1/chat/completions" && return 0; sleep 0.5; done + return 1 +} + +# --- 4a — ACCEPT over a NAMED HTTPS tunnel (X-Forwarded-Proto: https) --- +if ! command -v cloudflared >/dev/null 2>&1; then + skip "4a --secure ACCEPTS over an HTTPS tunnel" "cloudflared not installed" +elif [ -z "${SECURE_TUNNEL_HOSTNAME:-}" ] || [ -z "${SECURE_TUNNEL_NAME:-}" ] || [ ! -f "$HOME/.cloudflared/cert.pem" ]; then + skip "4a --secure ACCEPTS over an HTTPS tunnel" "no named-tunnel creds — run 'cloudflared tunnel login', route a hostname, then set SECURE_TUNNEL_NAME + SECURE_TUNNEL_HOSTNAME (quick tunnels not used: account-less edge is unreliable)" +elif ! secure_gw "--secure"; then + fail "4a --secure gateway" "gateway did not start on :$SEC_PORT" +else + # NB: --no-autoupdate is NOT a `tunnel run` flag (only valid for the quick + # `tunnel --url` form); passing it here makes cloudflared print usage and never + # connect → the hostname 530s. Omit it (no autoupdate fires in a ~60s test). + cloudflared tunnel run --url "http://127.0.0.1:$SEC_PORT" "$SECURE_TUNNEL_NAME" >"$WORK/cf.log" 2>&1 & + for _ in $(seq 1 20); do grep -qiE 'Registered tunnel connection' "$WORK/cf.log" && break; sleep 1; done + SC=000 + for _ in $(seq 1 25); do + SC=$(curl -s -o /dev/null -w '%{http_code}' --max-time 12 -A "$SEC_UA" "https://$SECURE_TUNNEL_HOSTNAME/v1/chat/completions" \ + -H 'Content-Type: application/json' -d "$SEC_BODY") + [ "$SC" = "402" ] && break; sleep 2 + done + case "$SC" in + 402) pass "4a --secure ACCEPTS an HTTPS-tunneled request (X-Forwarded-Proto: https → 402, not 400)" ;; + 400) fail "4a --secure over tunnel" "gate REJECTED an HTTPS-forwarded request (400) — secure-transport detection broken" ;; + *) skip "4a --secure over tunnel" "named tunnel $SECURE_TUNNEL_HOSTNAME did not serve (HTTP $SC) — check the tunnel run + DNS route" ;; + esac + jobs -p | xargs -r kill 2>/dev/null; SEC_PID="" +fi + +# --- 4b/4c — REJECT/ACCEPT a real non-private plaintext origin from a tailnet peer --- +MAC_TS_IP="" +command -v tailscale >/dev/null 2>&1 && MAC_TS_IP=$(tailscale ip -4 2>/dev/null | head -1) +[ -z "$MAC_TS_IP" ] && [ -x "/Applications/Tailscale.app/Contents/MacOS/Tailscale" ] \ + && MAC_TS_IP=$("/Applications/Tailscale.app/Contents/MacOS/Tailscale" ip -4 2>/dev/null | head -1) +ORIGIN_SSH="${SECURE_ORIGIN_SSH:-$SPARK}" +remote_code() { # HTTP code a remote tailnet peer sees hitting mac-tailnet-ip:$SEC_PORT in plaintext + ssh -o BatchMode=yes -o ConnectTimeout=8 "$ORIGIN_SSH" \ + "curl -s -o /dev/null -w '%{http_code}' --max-time 8 http://$MAC_TS_IP:$SEC_PORT/v1/chat/completions -H 'Content-Type: application/json' -d '$SEC_BODY'" 2>/dev/null || echo 000 +} +if [ -z "$MAC_TS_IP" ]; then + skip "4b --secure REJECTS a non-secure (CGNAT) plaintext origin" "this host is not on a tailnet — 'tailscale up' so a remote peer can reach a non-private mac IP" + skip "4c insecure-default ACCEPTS the same origin" "this host is not on a tailnet" +elif [ -z "$ORIGIN_SSH" ] || ! ssh -o BatchMode=yes -o ConnectTimeout=6 "$ORIGIN_SSH" 'echo ok' >/dev/null 2>&1; then + skip "4b --secure REJECTS a non-secure plaintext origin" "no reachable tailnet origin peer (set SECURE_ORIGIN_SSH=)" + skip "4c insecure-default ACCEPTS the same origin" "no reachable tailnet origin peer" +else + secure_gw "--secure"; SC=$(remote_code) + case "$SC" in + 400) pass "4b --secure REJECTS plaintext from a non-secure origin ($ORIGIN_SSH → $MAC_TS_IP, HTTP 400)" ;; + 000) skip "4b --secure reject" "origin $ORIGIN_SSH could not reach $MAC_TS_IP:$SEC_PORT (tailnet routing/firewall)" ;; + *) fail "4b --secure reject" "expected 400 from a non-private origin, got HTTP $SC" ;; + esac + secure_gw ""; SC=$(remote_code) + case "$SC" in + 000) skip "4c insecure-default accept" "origin $ORIGIN_SSH could not reach $MAC_TS_IP:$SEC_PORT" ;; + 400) fail "4c insecure accept" "insecure default rejected a plaintext origin (400) — should accept" ;; + *) pass "4c insecure-default ACCEPTS the same plaintext origin (HTTP $SC, not 400)" ;; + esac + [ -n "$SEC_PID" ] && kill "$SEC_PID" 2>/dev/null; SEC_PID="" +fi + +# =========================================================================== +section "Summary" +# =========================================================================== +printf '%s\n' "${RESULTS[@]}" +FAILS=$(printf '%s\n' "${RESULTS[@]}" | grep -c '^FAIL' || true) +echo +if [ "$FAILS" -eq 0 ]; then + echo "P2P-surface smoke: no failures ✓" + exit 0 +else + echo "P2P-surface smoke: $FAILS failure(s) ✗" + exit 1 +fi diff --git a/flows/tools/x402-sign/main.go b/flows/tools/x402-sign/main.go new file mode 100644 index 00000000..b0b2565e --- /dev/null +++ b/flows/tools/x402-sign/main.go @@ -0,0 +1,77 @@ +// Command x402-sign is a host-side raw-X-PAYMENT signer for tests and smokes. +// +// It reads an x402 402 challenge JSON on stdin (the `{accepts:[...]}` body a +// seller returns, or a bare PaymentRequirements object) and a signer private +// key, and prints the base64 `X-PAYMENT` header value for accepts[0] to stdout. +// +// This is the host-side equivalent of what `obol buy dataset --join` does +// internally — it exposes the same x402.SignExactPayment primitive for the +// raw-X-PAYMENT seller paths (e.g. `obol sell inference`) that have no buyer +// CLI, so a smoke can drive a real paid request end to end. +// +// curl -s seller/402 | x402-sign --key 0x > xpay.txt +// curl seller -H "X-PAYMENT: $(cat xpay.txt)" ... +package main + +import ( + "encoding/json" + "flag" + "fmt" + "io" + "os" + "strings" + + "github.com/ObolNetwork/obol-stack/internal/x402" + "github.com/ethereum/go-ethereum/crypto" + x402types "github.com/x402-foundation/x402/go/types" +) + +func main() { + keyHex := flag.String("key", os.Getenv("X402_SIGN_KEY"), "signer private key (hex, 0x-optional; or env X402_SIGN_KEY)") + flag.Parse() + + if strings.TrimSpace(*keyHex) == "" { + fatal("--key (or X402_SIGN_KEY) is required") + } + key, err := crypto.HexToECDSA(strings.TrimPrefix(strings.TrimSpace(*keyHex), "0x")) + if err != nil { + fatal("bad signer key: %v", err) + } + + raw, err := io.ReadAll(io.LimitReader(os.Stdin, 1<<20)) + if err != nil { + fatal("read stdin: %v", err) + } + + req, err := firstRequirement(raw) + if err != nil { + fatal("%v", err) + } + + xpay, err := x402.SignExactPayment(key, req) + if err != nil { + fatal("sign: %v", err) + } + fmt.Println(xpay) +} + +// firstRequirement pulls accepts[0] from a 402 challenge body, or accepts a +// bare PaymentRequirements object. +func firstRequirement(raw []byte) (x402types.PaymentRequirements, error) { + var challenge struct { + Accepts []x402types.PaymentRequirements `json:"accepts"` + } + if err := json.Unmarshal(raw, &challenge); err == nil && len(challenge.Accepts) > 0 { + return challenge.Accepts[0], nil + } + var pr x402types.PaymentRequirements + if err := json.Unmarshal(raw, &pr); err == nil && pr.Scheme != "" { + return pr, nil + } + return x402types.PaymentRequirements{}, fmt.Errorf("input is neither a 402 challenge with accepts[] nor a PaymentRequirements object") +} + +func fatal(format string, a ...any) { + fmt.Fprintf(os.Stderr, "x402-sign: "+format+"\n", a...) + os.Exit(1) +} diff --git a/internal/embed/infrastructure/cloudflared/values.yaml b/internal/embed/infrastructure/cloudflared/values.yaml index b832be0f..4dbe8477 100644 --- a/internal/embed/infrastructure/cloudflared/values.yaml +++ b/internal/embed/infrastructure/cloudflared/values.yaml @@ -5,7 +5,7 @@ transport: image: repository: cloudflare/cloudflared - tag: "2026.5.2@sha256:12ff5c6992a9863db4da270746af7c244bcaee49353039af8104268a18d6c4f0" + tag: "2026.6.0@sha256:ba461b8aa9c042156dbd39c38657fe7431bafa063220eab8d5330a523863da9f" metrics: address: "0.0.0.0:2000" From 5f3ebad3e68b8081fce434c56d48637707be7923 Mon Sep 17 00:00:00 2001 From: bussyjd <145845+bussyjd@users.noreply.github.com> Date: Tue, 16 Jun 2026 21:00:39 +0400 Subject: [PATCH 10/11] chore(hermes): use payments-enabled agent image --- internal/agentcrd/agent_contract_integration_test.go | 4 ++-- internal/hermes/hermes.go | 2 +- internal/serviceoffercontroller/agent_render.go | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/internal/agentcrd/agent_contract_integration_test.go b/internal/agentcrd/agent_contract_integration_test.go index 146b28a2..70491a2d 100644 --- a/internal/agentcrd/agent_contract_integration_test.go +++ b/internal/agentcrd/agent_contract_integration_test.go @@ -20,8 +20,8 @@ import ( // // The unit tests in agent_test.go and serviceoffercontroller/agent_render_test.go // only prove that we *render* the `.no-bundled-skills` marker and the capped -// hermes-config keys. They do NOT prove the Hermes image -// (nousresearch/hermes-agent:v2026.6.5) actually honors them. v2026.5.28 +// hermes-config keys. They do NOT prove the currently pinned Hermes image +// actually honors them. v2026.5.28 // shipped the marker check on the install/CLI path only; the per-launch // sync_skills() call ignored it and re-seeded ~24 categories from the // image-baked /opt/hermes/skills source on every boot, regardless of the diff --git a/internal/hermes/hermes.go b/internal/hermes/hermes.go index d4a09eed..23bf2c98 100644 --- a/internal/hermes/hermes.go +++ b/internal/hermes/hermes.go @@ -35,7 +35,7 @@ const ( rawChartVersion = "2.0.2" // renovate: datasource=docker depName=nousresearch/hermes-agent - defaultImage = "nousresearch/hermes-agent:v2026.6.5" + defaultImage = "nousresearch/hermes-agent:main" // Use the upstream image venv instead of cloning Hermes into the PVC on // every cold start. The init container below validates the required extras // are present so image regressions fail before the gateway starts. diff --git a/internal/serviceoffercontroller/agent_render.go b/internal/serviceoffercontroller/agent_render.go index e1ade18a..2114768b 100644 --- a/internal/serviceoffercontroller/agent_render.go +++ b/internal/serviceoffercontroller/agent_render.go @@ -27,7 +27,7 @@ const ( hermesDataPVC = "hermes-data" hermesAPIPath = "/health" // renovate: datasource=docker depName=nousresearch/hermes-agent - defaultHermesImage = "nousresearch/hermes-agent:v2026.6.5" + defaultHermesImage = "nousresearch/hermes-agent:main" ) // agentLabels returns the standard label set we attach to every primitive From 5c52e0021c86bea1ea07c78d7a958725f9f645b9 Mon Sep 17 00:00:00 2001 From: bussyjd <145845+bussyjd@users.noreply.github.com> Date: Tue, 16 Jun 2026 21:06:17 +0400 Subject: [PATCH 11/11] fix(hermes): pin payments-enabled image digest --- internal/hermes/hermes.go | 2 +- internal/serviceoffercontroller/agent_render.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/internal/hermes/hermes.go b/internal/hermes/hermes.go index 23bf2c98..2a473471 100644 --- a/internal/hermes/hermes.go +++ b/internal/hermes/hermes.go @@ -35,7 +35,7 @@ const ( rawChartVersion = "2.0.2" // renovate: datasource=docker depName=nousresearch/hermes-agent - defaultImage = "nousresearch/hermes-agent:main" + defaultImage = "nousresearch/hermes-agent:main@sha256:e9f2892b626468d2a65abeae9f94ec0a71872d7d9643906b956ab29c9bf328a9" // Use the upstream image venv instead of cloning Hermes into the PVC on // every cold start. The init container below validates the required extras // are present so image regressions fail before the gateway starts. diff --git a/internal/serviceoffercontroller/agent_render.go b/internal/serviceoffercontroller/agent_render.go index 2114768b..8392ac40 100644 --- a/internal/serviceoffercontroller/agent_render.go +++ b/internal/serviceoffercontroller/agent_render.go @@ -27,7 +27,7 @@ const ( hermesDataPVC = "hermes-data" hermesAPIPath = "/health" // renovate: datasource=docker depName=nousresearch/hermes-agent - defaultHermesImage = "nousresearch/hermes-agent:main" + defaultHermesImage = "nousresearch/hermes-agent:main@sha256:e9f2892b626468d2a65abeae9f94ec0a71872d7d9643906b956ab29c9bf328a9" ) // agentLabels returns the standard label set we attach to every primitive