diff --git a/.codex/hooks.json b/.codex/hooks.json new file mode 100644 index 00000000..73c79a35 --- /dev/null +++ b/.codex/hooks.json @@ -0,0 +1,37 @@ +{ + "hooks": { + "SessionStart": [ + { + "matcher": "startup|resume", + "hooks": [ + { + "type": "command", + "command": "/usr/bin/python3 \"$(git rev-parse --show-toplevel)/.codex/hooks/workspace_context.py\"", + "statusMessage": "Loading Obol Stack bundle context" + } + ] + } + ], + "UserPromptSubmit": [ + { + "hooks": [ + { + "type": "command", + "command": "/usr/bin/python3 \"$(git rev-parse --show-toplevel)/.codex/hooks/workspace_context.py\"" + } + ] + } + ], + "Stop": [ + { + "hooks": [ + { + "type": "command", + "command": "/usr/bin/python3 \"$(git rev-parse --show-toplevel)/.codex/hooks/stop_spec_sync.py\"", + "timeout": 30 + } + ] + } + ] + } +} diff --git a/.codex/hooks/stop_spec_sync.py b/.codex/hooks/stop_spec_sync.py new file mode 100644 index 00000000..a3f0ea59 --- /dev/null +++ b/.codex/hooks/stop_spec_sync.py @@ -0,0 +1,112 @@ +#!/usr/bin/env python3 + +import json +import os +import subprocess +import sys +from typing import Iterable + + +CANONICAL_PREFIXES = ( + "SPEC.md", + "ARCHITECTURE.md", + "BEHAVIORS_AND_EXPECTATIONS.md", + "CONTRIBUTING.md", + "features/", + "docs/adr/", +) + +SPEC_IMPACT_PREFIXES = ( + "cmd/obol/", + "internal/stack/", + "internal/model/", + "internal/network/", + "internal/openclaw/", + "internal/agent/", + "internal/x402/", + "internal/tunnel/", + "internal/erc8004/", + "internal/inference/", + "internal/embed/infrastructure/", + "internal/embed/skills/", + "internal/app/", + "internal/schemas/", +) + + +def git_root(cwd: str) -> str: + result = subprocess.run( + ["git", "rev-parse", "--show-toplevel"], + cwd=cwd, + check=True, + capture_output=True, + text=True, + ) + return result.stdout.strip() + + +def git_lines(root: str, args: list[str]) -> list[str]: + result = subprocess.run( + ["git", *args], + cwd=root, + check=True, + capture_output=True, + text=True, + ) + return [line.strip() for line in result.stdout.splitlines() if line.strip()] + + +def matches(path: str, prefixes: Iterable[str]) -> bool: + return any(path == prefix or path.startswith(prefix) for prefix in prefixes) + + +def main() -> int: + payload = json.load(sys.stdin) + cwd = payload.get("cwd") or os.getcwd() + + try: + root = git_root(cwd) + except Exception: + json.dump({"continue": True}, sys.stdout) + return 0 + + changed = set() + for args in ( + ["diff", "--name-only"], + ["diff", "--name-only", "--cached"], + ["ls-files", "--others", "--exclude-standard"], + ): + try: + changed.update(git_lines(root, args)) + except subprocess.CalledProcessError: + pass + + impacting = sorted(path for path in changed if matches(path, SPEC_IMPACT_PREFIXES)) + canonical = sorted(path for path in changed if matches(path, CANONICAL_PREFIXES)) + + if not impacting or canonical: + json.dump({"continue": True}, sys.stdout) + return 0 + + preview = ", ".join(impacting[:4]) + if len(impacting) > 4: + preview = f"{preview}, +{len(impacting) - 4} more" + + reason = ( + "Spec-impacting changes were detected in " + f"{preview}. Update the canonical root bundle " + "(SPEC.md, ARCHITECTURE.md, BEHAVIORS_AND_EXPECTATIONS.md, " + "CONTRIBUTING.md, features/, or docs/adr/) before ending the turn, " + "or explicitly explain why no spec change is required." + ) + + if payload.get("stop_hook_active"): + json.dump({"continue": False, "systemMessage": reason}, sys.stdout) + return 0 + + json.dump({"decision": "block", "reason": reason}, sys.stdout) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/.codex/hooks/workspace_context.py b/.codex/hooks/workspace_context.py new file mode 100644 index 00000000..d3ff2a75 --- /dev/null +++ b/.codex/hooks/workspace_context.py @@ -0,0 +1,53 @@ +#!/usr/bin/env python3 + +import json +import os +import subprocess +import sys + + +def git_root(cwd: str) -> str: + try: + result = subprocess.run( + ["git", "rev-parse", "--show-toplevel"], + cwd=cwd, + check=True, + capture_output=True, + text=True, + ) + return result.stdout.strip() + except Exception: + return cwd + + +def main() -> int: + payload = json.load(sys.stdin) + cwd = payload.get("cwd") or os.getcwd() + root = git_root(cwd) + event_name = payload.get("hook_event_name") or "SessionStart" + + context = "\n".join( + [ + f"Repository conventions for {os.path.basename(root)}:", + "- PR288 is the behavioral baseline for the canonical bundle.", + "- The canonical bundle lives at repo root: SPEC.md, ARCHITECTURE.md, BEHAVIORS_AND_EXPECTATIONS.md, CONTRIBUTING.md, features/, docs/adr/.", + "- Actor priority is local operator, then agent developer, then remote buyer.", + "- Spec-impacting code changes must update the root bundle in the same turn.", + "- Future work belongs in explicit phase sections and ADR follow-ups, not ad hoc plan files.", + ] + ) + + json.dump( + { + "hookSpecificOutput": { + "hookEventName": event_name, + "additionalContext": context, + } + }, + sys.stdout, + ) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/ARCHITECTURE.md b/ARCHITECTURE.md new file mode 100644 index 00000000..af196b4f --- /dev/null +++ b/ARCHITECTURE.md @@ -0,0 +1,346 @@ +# Obol Stack Architecture + +**Version**: 1.0.0-pr288 +**Status**: Living document +**Last Updated**: 2026-03-29 + +This document is the structural companion to [SPEC.md](SPEC.md). It focuses on component boundaries, data flow, deployment topology, and trust boundaries for the PR `#288` baseline. + +--- + +## Table of Contents + +1. [Design Philosophy](#1-design-philosophy) +2. [Component Diagrams](#2-component-diagrams) +3. [Module Decomposition](#3-module-decomposition) +4. [Data Flow Diagrams](#4-data-flow-diagrams) +5. [Storage Architecture](#5-storage-architecture) +6. [Deployment Model](#6-deployment-model) +7. [Network Topology](#7-network-topology) +8. [Security Architecture](#8-security-architecture) + +--- + +## 1. Design Philosophy + +Obol Stack is built around these principles: + +1. **Local-first sovereignty**: the operator machine remains the source of truth for cluster, wallet, and skill state. +2. **Single operator entry point**: the `obol` CLI is the primary control surface for lifecycle, routing, applications, and monetization. +3. **Centralized protocol translation**: LiteLLM centralizes model routing, Traefik centralizes HTTP routing, and eRPC centralizes chain access. +4. **Bounded trust**: payment execution, signing, and public routing are split into separate components with different privileges. +5. **Phased extensibility**: experimental or not-yet-fully-integrated surfaces are explicit phase follow-ups rather than hidden assumptions. + +System constraints are defined in [SPEC.md](SPEC.md#15-system-constraints). + +--- + +## 2. Component Diagrams + +### 2.1 C4 Context Diagram + +```mermaid +C4Context + title Obol Stack - System Context + + Person(operator, "Local Operator", "Starts the stack, manages services, inspects health") + Person(agent_dev, "Agent Developer", "Deploys and tunes OpenClaw instances and skills") + Person(remote_buyer, "Remote Buyer", "Pays for public services or remote models") + + System(obol, "Obol Stack", "Local-first agent and infrastructure platform") + + System_Ext(ollama, "Ollama", "Local host model runtime") + System_Ext(cloud_llm, "Cloud LLM APIs", "Anthropic and OpenAI providers") + System_Ext(chainlist, "ChainList", "Public RPC discovery") + System_Ext(facilitator, "x402 Facilitator", "Payment verification and settlement") + System_Ext(cloudflare, "Cloudflare", "Tunnel control plane and edge") + System_Ext(chains, "EVM Chains", "Payment and registration settlement") + System_Ext(charts, "ArtifactHub / OCI / Helm Repos", "Managed application sources") + + Rel(operator, obol, "CLI + browser") + Rel(agent_dev, obol, "CLI + embedded skills") + Rel(remote_buyer, obol, "HTTPS paid requests") + Rel(obol, ollama, "HTTP") + Rel(obol, cloud_llm, "HTTPS") + Rel(obol, chainlist, "HTTPS") + Rel(obol, facilitator, "HTTPS") + Rel(obol, cloudflare, "Browser auth, API, tunnel traffic") + Rel(obol, chains, "JSON-RPC via eRPC") + Rel(obol, charts, "HTTPS / OCI pull") +``` + +### 2.2 C4 Container Diagram + +```mermaid +C4Container + title Obol Stack - Container Diagram + + Person(operator, "Local Operator") + Person(remote_buyer, "Remote Buyer") + + System_Boundary(host, "Operator Machine") { + Container(cli, "obol CLI", "Go", "Lifecycle, routing, apps, monetization") + Container_Boundary(cluster, "Local k3d/k3s Cluster") { + Container(traefik, "Traefik", "Gateway API", "Ingress and route dispatch") + Container(cloudflared, "cloudflared", "Cloudflare Tunnel", "Public ingress bridge") + Container(litellm, "LiteLLM", "Python", "OpenAI-compatible model gateway") + Container(buyer, "x402-buyer", "Go sidecar", "Attaches pre-signed payments") + Container(erpc, "eRPC", "Go", "Blockchain RPC gateway") + Container(verifier, "x402-verifier", "Go", "ForwardAuth payment checks") + Container(agent, "OpenClaw", "OpenClaw runtime", "Agent instances and skills") + Container(frontend, "Frontend", "React app", "Operator dashboard") + ContainerDb(prom, "Prometheus", "Monitoring stack", "Metrics and scrape targets") + } + } + + System_Ext(ollama, "Ollama") + System_Ext(facilitator, "x402 Facilitator") + System_Ext(chain, "EVM Chain") + + Rel(operator, cli, "Runs commands") + Rel(operator, traefik, "Uses obol.stack") + Rel(remote_buyer, cloudflared, "HTTPS") + Rel(cloudflared, traefik, "HTTP") + Rel(traefik, frontend, "Route /") + Rel(traefik, erpc, "Route /rpc") + Rel(traefik, verifier, "ForwardAuth /verify") + Rel(traefik, litellm, "Route public services after auth") + Rel(litellm, buyer, "paid/* route") + Rel(litellm, ollama, "ollama/* models") + Rel(verifier, facilitator, "Verify payment") + Rel(erpc, chain, "JSON-RPC") + Rel(agent, erpc, "Chain queries") + Rel(agent, verifier, "Pricing config and registration side effects") + Rel(prom, buyer, "Scrapes /metrics") +``` + +### 2.3 Component Diagram: Sell-Side Control Loop + +```mermaid +C4Component + title Sell-Side Control Loop + + Component(cli, "sell commands", "Go", "Creates ServiceOffers and local gateways") + Component(offer, "ServiceOffer CRD", "Kubernetes API", "Declarative sell contract") + Component(mon, "monetize.py", "Python skill", "Skill-driven reconcile loop") + Component(ver, "x402-verifier", "Go", "Payment gate") + Component(route, "HTTPRoute / Middleware", "Gateway API + Traefik", "Traffic publication") + Component(reg, "registration publisher", "Python skill", "Generates well-known document and optional on-chain registration") + + Rel(cli, offer, "Create / update") + Rel(offer, mon, "Read / status patch") + Rel(mon, ver, "Update pricing inputs") + Rel(mon, route, "Create route + middleware") + Rel(mon, reg, "Publish registration") +``` + +--- + +## 3. Module Decomposition + +| Module | Responsibility | SPEC Reference | +|--------|----------------|----------------| +| `internal/stack` | Backend lifecycle and default infrastructure deployment | Section 3.1 | +| `internal/model` | Central LiteLLM routing and provider patching | Section 3.2 | +| `internal/network` | eRPC, local network deployments, public RPC management | Section 3.3 | +| `internal/openclaw` | OpenClaw overlays, tokens, skills, wallets, dashboards | Section 3.4 | +| `internal/agent` | Elevation of the default agent with monetization powers | Section 3.4 | +| `cmd/obol/sell.go` + `internal/x402` | Sell-side operator and verifier paths | Section 3.5 | +| `internal/x402/buyer` | Buy-side sidecar runtime | Section 3.6 | +| `internal/tunnel` | Quick and DNS tunnel lifecycle | Section 3.7 | +| `internal/app` | Managed Helm-chart workloads | Section 3.8 | + +--- + +## 4. Data Flow Diagrams + +### 4.1 Stack Startup + +```mermaid +sequenceDiagram + participant O as Operator + participant CLI as obol CLI + participant B as Backend + participant H as Helmfile + participant L as LiteLLM + participant OC as OpenClaw + participant A as agent.Init + participant T as Tunnel + + O->>CLI: obol stack up + CLI->>B: Up() + B-->>CLI: kubeconfig + CLI->>H: sync defaults + H-->>CLI: baseline infrastructure ready + CLI->>L: autoConfigureLLM() + CLI->>OC: SetupDefault() + CLI->>A: patch RBAC + HEARTBEAT.md + CLI->>T: start only if persistent DNS tunnel exists + CLI-->>O: obol.stack ready +``` + +### 4.2 Sell-Side Publication + +```mermaid +sequenceDiagram + participant O as Operator + participant CLI as sell command + participant K as Kubernetes API + participant M as monetize.py + participant V as x402-verifier + participant G as Traefik / Gateway API + participant R as Registry publisher + + O->>CLI: obol sell http ... + CLI->>K: create ServiceOffer + M->>K: read ServiceOffer + M->>K: patch status ModelReady / UpstreamHealthy + M->>V: publish pricing route + M->>G: create Middleware + HTTPRoute + M->>R: publish agent-registration.json + M->>K: patch Ready +``` + +### 4.3 Buy-Side Request + +```mermaid +sequenceDiagram + participant A as Agent + participant S as Remote Signer + participant C as ConfigMaps + participant L as LiteLLM + participant B as x402-buyer + participant Seller as Remote Seller + participant F as Facilitator + + A->>Seller: probe without payment + Seller-->>A: 402 pricing + A->>S: pre-sign N auths + A->>C: store upstream config + auth pool + A->>L: request model paid/ + L->>B: forward request + B->>Seller: request without payment + Seller-->>B: 402 + B->>Seller: retry with X-PAYMENT + Seller->>F: verify payment + F-->>Seller: verification result + Seller-->>B: 200 response + B-->>L: inference result +``` + +--- + +## 5. Storage Architecture + +### 5.1 Overview + +State is intentionally split between: +- local XDG filesystem state managed by the CLI +- Kubernetes resources in the local cluster +- external chain state and facilitator state that the stack references but does not own + +### 5.2 Schema Summary + +| Store | Entity | Key Fields | Purpose | +|-------|--------|-----------|---------| +| Local config dir | stack metadata | `.stack-id`, `.stack-backend`, `kubeconfig.yaml` | Stack identity and runtime targeting | +| Local config dir | deployment config | `applications//`, `networks//` | Declarative deployment inputs | +| Kubernetes API | `ServiceOffer` | `spec.upstream`, `spec.payment`, `status.conditions` | Sell-side contract and reconcile status | +| Kubernetes ConfigMaps | routing and pricing state | LiteLLM, eRPC, x402, buyer config | Dynamic runtime routing | +| Kubernetes Secrets | provider creds and tunnel token | API keys, tunnel token | Sensitive runtime inputs | + +--- + +## 6. Deployment Model + +### 6.1 Deployment Diagram + +```mermaid +graph TD + subgraph "Operator Host" + CLI["obol CLI"] + XDG["XDG config/data/state"] + OLLAMA["Ollama (optional)"] + end + + subgraph "Local k3d / k3s Cluster" + TRAEFIK["Traefik + Gateway"] + CLOUDFLARED["cloudflared"] + LLM["LiteLLM + x402-buyer"] + ERPC["eRPC"] + X402["x402-verifier"] + OCA["OpenClaw / obol-agent"] + FE["Frontend"] + MON["Monitoring"] + end + + CLI --> XDG + CLI --> TRAEFIK + CLI --> OCA + CLI --> ERPC + LLM --> OLLAMA + CLOUDFLARED --> TRAEFIK + TRAEFIK --> FE + TRAEFIK --> ERPC + TRAEFIK --> X402 + TRAEFIK --> LLM +``` + +### 6.2 Infrastructure Requirements + +| Resource | Requirement | Notes | +|----------|-------------|-------| +| Local runtime | Docker for `k3d` or direct host support for `k3s` | Backend-specific prerequisites | +| Filesystem | Writable XDG config/data/state dirs | Required for persistent stack state | +| Network | Local loopback plus outbound HTTPS | Needed for providers, ChainList, facilitator, Cloudflare | +| Optional Cloudflare account | Required only for persistent DNS tunnel | Quick tunnel path can remain local-first | + +--- + +## 7. Network Topology + +- `obol.stack` is the local operator hostname. +- Frontend and eRPC are intentionally bound behind `hostnames: ["obol.stack"]`. +- Public service routes flow through Cloudflare tunnel to Traefik, then through x402 ForwardAuth before reaching an upstream. +- Buyer-side `paid/*` traffic stays inside the cluster until the sidecar contacts a remote seller. +- Registration JSON is intentionally public and bypasses ForwardAuth. + +--- + +## 8. Security Architecture + +### 8.1 Trust Boundaries + +Trust boundaries exist between: +- operator host and local cluster +- local-only routes and public tunnel routes +- remote signer and buyer sidecar +- x402 verification and upstream service execution +- local filesystem state and external chain/facilitator systems + +### 8.2 Authentication Flow + +```mermaid +sequenceDiagram + participant Buyer as Remote Buyer + participant Traefik as Traefik + participant Verifier as x402-verifier + participant Fac as Facilitator + participant Upstream as Service + + Buyer->>Traefik: HTTPS request + Traefik->>Verifier: ForwardAuth /verify + Verifier->>Fac: validate X-PAYMENT + Fac-->>Verifier: result + Verifier-->>Traefik: 200 or 402 + Traefik->>Upstream: only after 200 +``` + +### 8.3 Data Encryption + +| Data | At Rest | In Transit | +|------|---------|-----------| +| Provider API keys | Kubernetes Secret | HTTPS to provider APIs | +| Wallet and backup material | Local data dir, optional encrypted backup | Local filesystem or remote signer API | +| Tunnel traffic | Cloudflare-managed | HTTPS / QUIC | +| Payment proofs | Not persisted by the sidecar beyond auth pool state | HTTPS to seller / facilitator | diff --git a/BEHAVIORS_AND_EXPECTATIONS.md b/BEHAVIORS_AND_EXPECTATIONS.md new file mode 100644 index 00000000..fa75e04f --- /dev/null +++ b/BEHAVIORS_AND_EXPECTATIONS.md @@ -0,0 +1,308 @@ +# Obol Stack - Behaviors and Expectations + +**Version**: 1.0.0-pr288 +**Status**: Living document +**Last Updated**: 2026-03-29 + +This document defines the behavioral contract for Obol Stack on the PR `#288` baseline. Every behavior here maps to current or planned BDD scenarios in [features/](features/). + +--- + +## Table of Contents + +1. [Introduction](#1-introduction) +2. [Desired Behaviors](#2-desired-behaviors) +3. [Undesired Behaviors](#3-undesired-behaviors) +4. [Edge Cases](#4-edge-cases) +5. [Performance Expectations](#5-performance-expectations) +6. [Guardrail Definitions](#6-guardrail-definitions) + +--- + +## 1. Introduction + +### 1.1 Purpose + +This is the behavioral contract for Obol Stack. It defines what the current branch should do, what it must not do, and how it should degrade when optional dependencies are absent. + +### 1.2 Reading Guide + +Behavior entries use: +- **Trigger**: what starts the behavior +- **Expected**: what the system should do +- **Rationale**: why the behavior matters + +Cross-references use `SPEC SS X.Y`, pointing to [SPEC.md](SPEC.md). + +### 1.3 Behavioral Priorities + +The behavior model is ordered by actor priority: +1. local operator +2. agent developer +3. remote buyer + +When tradeoffs conflict, operator safety and recoverability win. + +--- + +## 2. Desired Behaviors + +### 2.1 Stack Lifecycle + +> SPEC SS 3.1 + +#### B-2.1.1: Stack initialization persists a stable cluster identity + +**Trigger**: The operator runs `obol stack init`. +**Expected**: The CLI writes a stack ID, backend selection, and rendered defaults into the config directory. If `--force` is used against an existing stack, the stack ID is preserved unless the operator explicitly purges the stack. +**Rationale**: Persistent identity keeps local state, directory naming, and LiteLLM master-key derivation stable. + +#### B-2.1.2: Stack startup deploys defaults before optional public exposure + +**Trigger**: The operator runs `obol stack up`. +**Expected**: The cluster starts, baseline infrastructure is deployed through Helmfile, LiteLLM is auto-configured when possible, the default OpenClaw instance is prepared, and the tunnel remains dormant unless a persistent DNS tunnel was previously provisioned. +**Rationale**: Local operation is the primary mode. Public exposure must not be a prerequisite for core startup. + +#### B-2.1.3: Purge preserves data unless the operator explicitly requests destruction + +**Trigger**: The operator runs `obol stack purge`. +**Expected**: Config is removed and the cluster is destroyed, but persistent data survives unless `--force` is used. +**Rationale**: Wallets and agent state are valuable and must not be destroyed by the ordinary cleanup path. + +### 2.2 LLM Routing + +> SPEC SS 3.2 + +#### B-2.2.1: LiteLLM acts as the central operator-facing model gateway + +**Trigger**: An OpenClaw instance or operator-configured route needs model access. +**Expected**: Requests are routed through LiteLLM rather than per-instance ad hoc provider wiring. +**Rationale**: Central routing reduces duplication, keeps provider config consistent, and enables the static buy-side `paid/*` namespace. + +#### B-2.2.2: Model auto-configuration is best-effort, not mandatory + +**Trigger**: `stack up` runs on a host with or without Ollama or cloud credentials. +**Expected**: When models or credentials are discoverable they are applied automatically; otherwise the stack still starts and the operator can configure providers later. +**Rationale**: Startup should remain recoverable even when optional provider dependencies are absent. + +#### B-2.2.3: Custom OpenAI-compatible endpoints are validated before they are added + +**Trigger**: The operator runs `obol model setup custom ...`. +**Expected**: The endpoint is validated before it becomes part of the LiteLLM route set. +**Rationale**: Broken model entries create confusing downstream failures for operators and agents. + +### 2.3 Network Management + +> SPEC SS 3.3 + +#### B-2.3.1: Local installable networks and remote RPC aliases remain distinct + +**Trigger**: The operator uses `obol network install`, `list`, `add`, or `remove`. +**Expected**: Local deployable networks come only from embedded network bundles, while remote RPC aliases are resolved from the ChainList alias map and public RPC discovery flow. +**Rationale**: Treating these as separate prevents invalid support claims and operator confusion. + +#### B-2.3.2: Public RPC writes are blocked by default + +**Trigger**: The operator adds a remote chain without `--allow-writes`. +**Expected**: eRPC write methods remain blocked on that chain. +**Rationale**: Read-only defaults reduce the chance of accidental live transactions. + +#### B-2.3.3: Network status reflects current command semantics, not idealized per-deployment views + +**Trigger**: The operator runs `obol network status`. +**Expected**: The command reports current eRPC gateway health and upstream counts; it does not pretend to be a per-deployment local-node dashboard unless the implementation adds that contract. +**Rationale**: The spec must match the current CLI surface exactly. + +### 2.4 OpenClaw Runtime + +> SPEC SS 3.4 + +#### B-2.4.1: The default OpenClaw instance is the canonical elevated agent runtime + +**Trigger**: `stack up` completes on a branch where the default agent can be configured. +**Expected**: The `obol-agent` instance is created or re-synced and then elevated via RBAC patching and heartbeat injection. +**Rationale**: Monetization and cluster-aware agent behavior rely on a single canonical elevated runtime. + +#### B-2.4.2: Additional OpenClaw instances remain operator-managed deployments + +**Trigger**: The operator uses `obol openclaw onboard`, `sync`, `delete`, `dashboard`, `token`, `skills`, or wallet flows. +**Expected**: Instance selection, deployment directories, dashboard URLs, skills, and tokens are all managed through the CLI and persisted under managed config directories. +**Rationale**: OpenClaw instances are part of the platform control surface, not transient ad hoc workloads. + +### 2.5 Sell-Side Monetization + +> SPEC SS 3.5 + +#### B-2.5.1: Sell-side resources are created in the namespace the operator chose + +**Trigger**: The operator runs `obol sell http --namespace ...`. +**Expected**: The resulting `ServiceOffer` is created in `` and references the chosen upstream namespace explicitly. +**Rationale**: Namespace is an operator intent field and cannot be silently rewritten by the implementation or docs. + +#### B-2.5.2: Reconciliation advances through six explicit stages + +**Trigger**: A `ServiceOffer` is created or updated. +**Expected**: The offer advances through `ModelReady`, `UpstreamHealthy`, `PaymentGateReady`, `RoutePublished`, `Registered`, and `Ready`, with status updates visible to operators. +**Rationale**: Operators need a clear progress model for debugging sell-side failures. + +#### B-2.5.3: Registration failure degrades gracefully when possible + +**Trigger**: Registration is enabled but signer, gas, or RPC prerequisites are missing. +**Expected**: The service can remain payment-gated and publicly described with `Registered=True` and an `OffChainOnly` reason when that degraded path applies. +**Rationale**: Public discovery should not be all-or-nothing when the on-chain mint path is temporarily unavailable. + +#### B-2.5.4: Probe verifies the payment gate without consuming buyer budget + +**Trigger**: The operator runs `obol sell probe -n `. +**Expected**: The command sends an unauthenticated request, expects a `402` pricing response, and confirms that the route is live and payment-gated. +**Rationale**: Operators need a cheap verification path before involving a real buyer flow. + +### 2.6 Buy-Side Payments + +> SPEC SS 3.6 + +#### B-2.6.1: Paid model routing uses a static public namespace + +**Trigger**: An agent requests `paid/` through LiteLLM. +**Expected**: LiteLLM resolves the request to the buyer sidecar without requiring dynamic model-list rewrites for every purchased upstream. +**Rationale**: Static public naming keeps the buy-side integration simple and operationally stable. + +#### B-2.6.2: Buyer runtime spending is bounded by the pre-signed auth pool + +**Trigger**: The buyer sidecar serves paid requests. +**Expected**: It consumes only pre-signed authorizations and cannot mint new spend authority at runtime. +**Rationale**: This is the key safety property of the buy-side design. + +#### B-2.6.3: Unmapped paid models fail explicitly + +**Trigger**: A request arrives for `paid/` that does not map to a purchased upstream. +**Expected**: The request fails with a clear not-found style response rather than silently drifting to another provider. +**Rationale**: Silent fallback would break spending and trust assumptions. + +### 2.7 Tunnel, Discovery, Frontend, and Monitoring + +> SPEC SS 3.7 + +#### B-2.7.1: Quick tunnel activation is demand-driven + +**Trigger**: The stack starts without a pre-provisioned DNS tunnel. +**Expected**: Cloudflared remains dormant until a sell flow requires public exposure or the operator starts it manually. +**Rationale**: The operator should not pay the complexity cost of public exposure before it is needed. + +#### B-2.7.2: Public discovery metadata reflects the current tunnel URL + +**Trigger**: A tunnel URL becomes available or changes. +**Expected**: The stack updates `AGENT_BASE_URL` and syncs frontend-readable configuration so generated registration documents point at the current public origin. +**Rationale**: Discovery documents must describe reachable public endpoints. + +#### B-2.7.3: Frontend remains local-only unless the architecture changes deliberately + +**Trigger**: The operator accesses the dashboard. +**Expected**: The frontend is served under `obol.stack` and is not exposed by the public tunnel path. +**Rationale**: The frontend is an operator control surface, not a public buyer surface. + +### 2.8 Managed Applications and Supporting Operations + +> SPEC SS 3.8 + +#### B-2.8.1: Managed applications behave like named, persistent deployments + +**Trigger**: The operator runs `obol app install`, `sync`, `list`, or `delete`. +**Expected**: Chart references are resolved, persisted under managed config paths, and deployed or removed through explicit CLI flows. +**Rationale**: Application management should match the rest of the stack’s declarative local-state model. + +--- + +## 3. Undesired Behaviors + +### 3.1 Exposure and Safety + +#### U-3.1.1: Local-only operator routes are reachable through the public tunnel + +**Trigger**: Route configuration removes or bypasses `obol.stack` hostname restrictions for frontend, eRPC, or monitoring. +**Expected**: The change is rejected or treated as a critical regression. +**Risk**: Public exposure of operator-only surfaces weakens the main trust boundary of the stack. + +#### U-3.1.2: Remote RPC write capability is enabled by default + +**Trigger**: A newly added public RPC upstream forwards write methods without explicit opt-in. +**Expected**: Write methods remain blocked unless the operator used `--allow-writes`. +**Risk**: Unintended live-chain transactions become possible through a read-mostly operator flow. + +#### U-3.1.3: Buyer runtime receives live signing authority + +**Trigger**: Runtime changes allow the sidecar to contact the remote signer or mint new spend approvals. +**Expected**: The runtime remains restricted to pre-signed authorizations only. +**Risk**: The bounded-spend trust model collapses. + +### 3.2 Contract Drift + +#### U-3.2.1: Documentation claims operator support that the CLI does not ship + +**Trigger**: Specs or guides describe commands, flags, or supported chains that do not exist in the branch. +**Expected**: The canonical bundle is corrected to the current code surface, with future work moved into phased sections. +**Risk**: Operators make invalid assumptions and the spec stops being implementation-ready. + +--- + +## 4. Edge Cases + +### 4.1 Startup and Operator Recovery + +#### E-4.1.1: No local model provider is immediately available + +**Scenario**: The stack starts without Ollama models and without imported cloud credentials. +**Expected Handling**: Core infrastructure still starts; OpenClaw setup may be skipped or remain partially configured until the operator runs explicit provider setup. +**Rationale**: Provider absence should not destroy the local operator path. + +#### E-4.1.2: Helmfile sync fails during startup + +**Scenario**: Default infrastructure deployment fails mid-startup. +**Expected Handling**: The stack automatically runs a cleanup-oriented shutdown path. +**Rationale**: A half-started cluster is more dangerous than a failed startup. + +### 4.2 Payments and Registration + +#### E-4.2.1: Registration wallet has no gas + +**Scenario**: A service is ready for publication but the registration wallet cannot submit an on-chain transaction. +**Expected Handling**: The service degrades to `OffChainOnly` rather than disappearing entirely. +**Rationale**: Discovery metadata is still valuable even when chain settlement is temporarily blocked. + +#### E-4.2.2: Buyer auth pool is exhausted + +**Scenario**: A purchased upstream has no remaining signed authorizations. +**Expected Handling**: Requests fail explicitly until the operator or agent refills the pool. +**Rationale**: Silent fallback would break billing and hide a capacity problem. + +### 4.3 Selection Ambiguity + +#### E-4.3.1: Multiple deployments of the same type exist + +**Scenario**: The operator has multiple OpenClaw instances, app deployments, or network deployments. +**Expected Handling**: Commands auto-select only when there is exactly one unambiguous target; otherwise they require the operator to specify the target. +**Rationale**: Ambiguous automation is more dangerous than an extra required argument. + +--- + +## 5. Performance Expectations + +| Behavior | Target | Measurement | Degradation Handling | +|----------|--------|-------------|---------------------| +| ChainList discovery | bounded by 15s timeout | `internal/network/chainlist.go` timeout | operator retries with custom endpoint | +| Tunnel startup | bounded by 30s rollout wait | `tunnel.EnsureRunning()` rollout status | local path remains available | +| LiteLLM restart | bounded by 90s rollout wait | `model.RestartLiteLLM()` rollout status | operator reruns provider setup or inspects deployment | +| Buyer metrics visibility | 30s scrape interval | PodMonitor config | stale metrics do not block inference | + +--- + +## 6. Guardrail Definitions + +### 6.1 Non-Negotiable Guardrails + +| Guardrail | Rule | Enforcement | Violation Response | +|-----------|------|-------------|-------------------| +| Local-only surfaces | Frontend, eRPC, and monitoring stay behind `obol.stack` hostname restrictions | route templates, review, spec bundle | treat as critical regression | +| Static paid namespace | Buy-side public names remain `paid/` | LiteLLM config model, buyer sidecar routing | reject drifting implementations | +| Namespace fidelity | `sell http --namespace ` creates the `ServiceOffer` in `` | CLI manifest generation | treat mismatched docs or code as bug | +| Phase discipline | future behavior must live in phased sections or ADR follow-ups | canonical root-level bundle and hooks | block or fix before merge | diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index fad91f69..4b2458ea 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,90 +1,171 @@ -# Contributing to Blockchain Helm Charts +# Contributing to Obol Stack -Thank you for considering contributing to this project! This document provides guidelines to help you contribute effectively. +This document defines the non-negotiable contribution rules for the consolidated Obol Stack codebase and spec bundle. -## Getting Started +--- -### Prerequisites +## 1. Canonical Documents -- Kubernetes knowledge -- Helm chart development experience -- Understanding of the specific blockchain client you're creating/modifying a chart for +The canonical specification bundle lives at repo root: -### Development Environment +- `SPEC.md` +- `ARCHITECTURE.md` +- `BEHAVIORS_AND_EXPECTATIONS.md` +- `CONTRIBUTING.md` +- `features/` +- `docs/adr/` -1. Install [Helm](https://helm.sh/docs/intro/install/) -2. Install [kubectl](https://kubernetes.io/docs/tasks/tools/) -3. Set up a Kubernetes environment (minikube, kind, or a cloud provider) +Supporting material in `docs/guides/` can remain useful, but it is **not** authoritative once the root-level bundle covers the same topic. +Planning or architecture notes must be folded into `SPEC.md` phase sections or `docs/adr/` instead of living as parallel sources of truth under `docs/plans/` or `plans/`. -## Chart Development Guidelines +If code and docs disagree: +- code is the temporary source of truth +- the root-level bundle must be updated in the same change or immediately after -### Chart Structure +--- -Each chart should follow this structure: -``` -charts// -├── Chart.yaml -├── values.yaml -├── templates/ -│ ├── deployment.yaml -│ ├── service.yaml -│ ├── configmap.yaml -│ ├── secret.yaml (if needed) -│ ├── pvc.yaml (if needed) -│ └── NOTES.txt -├── OWNERS (maintainers list) -└── README.md (chart-specific documentation) -``` +## 2. Actor Priority + +When making product or UX tradeoffs, preserve this order: + +1. Local operator +2. Agent developer +3. Remote buyer + +This affects: +- defaults +- failure handling +- public exposure rules +- CLI ergonomics +- phased rollout decisions + +--- + +## 3. Documentation Update Rules + +Any change touching these areas is spec-impacting and must update the canonical bundle when it changes behavior: -### Requirements +- `cmd/obol/` +- `internal/stack/` +- `internal/model/` +- `internal/network/` +- `internal/openclaw/` +- `internal/agent/` +- `internal/x402/` +- `internal/x402/buyer/` +- `internal/tunnel/` +- `internal/erc8004/` +- `internal/inference/` +- `internal/embed/infrastructure/` +- `internal/embed/skills/` +- `internal/app/` +- `internal/schemas/` -- Charts must be compatible with Helm 3 -- Include comprehensive documentation -- Provide sensible defaults in values.yaml -- Include proper Kubernetes resource requests and limits -- Follow security best practices +Rules: +- describe only behavior that is actually implemented on the branch +- move future work into `Phase 2+` sections and ADR follow-ups +- do not silently broaden support claims +- do not collapse different chain domains into one “supported networks” statement -### Values.yaml +Current chain domains that must stay distinct: +- installable local networks +- eRPC remote RPC aliases +- sell-side payment chains +- ERC-8004 registration networks -- Group related values logically -- Add comments explaining the purpose of values -- Include sensible defaults that work out-of-the-box -- Provide examples for custom configurations +--- -## Pull Request Process +## 4. Feature and ADR Discipline -1. Fork the repository -2. Create a new branch for your changes -3. Make your changes following the chart development guidelines -4. Test your charts thoroughly -5. Submit a pull request -6. Address review comments +Feature files: +- live under `features/` +- start with `@bdd` +- reference both `SPEC.md` and `BEHAVIORS_AND_EXPECTATIONS.md` +- use `@phase1`, `@phase2`, etc. when phases matter -### Pull Request Checklist +ADRs: +- live under `docs/adr/` +- record durable architectural decisions, not transient implementation chatter +- must note the affected `SPEC.md` sections -- [ ] Chart version updated according to semantic versioning -- [ ] Chart README.md updated with any new values or changes -- [ ] Chart has been tested and verified to work -- [ ] `helm lint` passes without warnings -- [ ] `helm template` generates valid Kubernetes resources +--- -## Testing Your Chart +## 5. Development Expectations + +Baseline validation before sending a substantial code change: + +```bash +go build ./... +go test ./... +``` + +When the change touches the monetization path, strongly prefer validating one or more of: ```bash -# Lint the chart -helm lint charts/your-chart +./flows/flow-06-sell-setup.sh +./flows/flow-07-sell-verify.sh +./flows/flow-08-buy.sh +./flows/flow-10-anvil-facilitator.sh +``` -# Render the templates -helm template charts/your-chart +When the change touches embedded skills or sell-side metadata, also consider: -# Install the chart in a test environment -helm install test-release charts/your-chart --dry-run +```bash +python3 tests/skills_smoke_test.py +python3 tests/test_sell_registration_metadata.py +python3 tests/test_autoresearch_worker.py ``` -## Code of Conduct +--- + +## 6. Security and Exposure Guardrails + +Never merge a change that: +- exposes frontend, eRPC, monitoring, or similar operator surfaces to the public tunnel +- gives the buyer sidecar live signer access +- changes sell-side chain support claims without updating both CLI behavior and docs +- enables write-capable public RPC forwarding by default +- removes the `OffChainOnly` degradation path without a replacement operator-safe fallback + +--- + +## 7. Hook-Based Drift Detection + +Repo-local Codex hooks should be treated as guardrails, not as a substitute for human judgment. + +Intended behavior: +- session-start hooks remind Codex that the root-level bundle is canonical +- stop hooks block or warn when spec-impacting code changed but the canonical bundle did not + +To enable Codex hooks locally: + +```toml +# ~/.codex/config.toml +[features] +codex_hooks = true +``` + +The repository hook entrypoint is: + +- `.codex/hooks.json` + +Hook scripts belong under: + +- `.codex/hooks/` + +This repository currently ships: + +- `.codex/hooks/workspace_context.py` +- `.codex/hooks/stop_spec_sync.py` + +If hooks and code ever disagree, fix the hooks or the bundle. Do not paper over the mismatch. -Please respect other contributors and maintain a positive environment for everyone. +--- -## Thank You +## 8. Pull Request Checklist -Your contributions help make this project better for everyone! +- [ ] Behavior changes are reflected in the root-level canonical bundle +- [ ] Future work is isolated into phases or ADR follow-ups +- [ ] Operator-facing claims match the actual CLI and runtime surface +- [ ] Security exposure boundaries were preserved +- [ ] Tests or flow validations were run, or the omission is explicitly stated diff --git a/SPEC.md b/SPEC.md new file mode 100644 index 00000000..fb875d89 --- /dev/null +++ b/SPEC.md @@ -0,0 +1,709 @@ +# Obol Stack Technical Specification + +**Version**: 1.0.0-pr288 +**Status**: Living document +**Last Updated**: 2026-03-29 + +This document is the authoritative technical specification for Obol Stack on the PR `#288` integration baseline. It describes the system that is actually implemented on this branch, with future work isolated into explicit phased rollout items and ADR follow-ups. + +Primary actor priority: +- Local operator +- Agent developer +- Remote buyer + +--- + +## Table of Contents + +1. [Introduction](#1-introduction) +2. [System Architecture](#2-system-architecture) +3. [Core Subsystems](#3-core-subsystems) +4. [API / Protocol Definition](#4-api--protocol-definition) +5. [Data Model](#5-data-model) +6. [Integration Points](#6-integration-points) +7. [Security Model](#7-security-model) +8. [Error Handling](#8-error-handling) +9. [Performance and Operations](#9-performance-and-operations) +10. [Phased Rollout](#10-phased-rollout) +11. [Testing Strategy](#11-testing-strategy) + +--- + +## 1. Introduction + +### 1.1 Purpose + +Obol Stack is a local-first Kubernetes platform for running AI agent infrastructure, blockchain connectivity, payment-gated services, and public discovery from a single operator-controlled machine. This specification defines the expected structure and behavior of the stack as shipped on the PR `#288` branch. + +### 1.2 Scope + +The system: +- Initializes and manages a local `k3d` or `k3s` cluster from an XDG-compliant CLI. +- Deploys default infrastructure: Traefik, eRPC, LiteLLM, Cloudflare tunnel connector, monitoring, frontend, and OpenClaw. +- Lets the operator configure local and cloud model providers through a central LiteLLM gateway. +- Lets the operator install local blockchain nodes and add remote RPC upstreams to eRPC. +- Runs OpenClaw instances with embedded skills, wallet management, and an elevated default `obol-agent`. +- Sells local services through x402 payment gates and optional ERC-8004 registration. +- Buys remote x402-gated inference through a bounded-risk sidecar pattern. +- Exposes local-only and public routes with different trust boundaries. +- Installs arbitrary Helm charts as managed applications. + +The system does **not**: +- Operate as a hosted multi-tenant SaaS control plane. +- Assume public exposure is required for the core local operator path. +- Guarantee exact token metering for every pricing model in the current phase. +- Treat every chain known to eRPC or `internal/x402` as an operator-supported sell-side CLI chain. +- Replace direct Kubernetes administration for users who want bespoke cluster changes outside Obol-managed paths. + +### 1.3 Personas + +| Persona | Goal | Primary Interfaces | +|--------|------|--------------------| +| Local operator | Bring up the stack, manage infra, expose services, inspect health | `obol` CLI, `http://obol.stack`, tunnel URL | +| Agent developer | Deploy and tune OpenClaw instances, skills, wallets, model routes | `obol openclaw`, `obol model`, embedded skills | +| Remote buyer | Discover and pay for a service or remote model | x402-gated HTTP endpoints, `paid/` through LiteLLM | + +### 1.4 Terminology and Glossary + +| Term | Definition | +|------|-----------| +| **Stack ID** | Petname-based identifier persisted in `$OBOL_CONFIG_DIR/.stack-id`; used for cluster identity and LiteLLM master key derivation. | +| **Backend** | Local cluster runtime: `k3d` (Docker-based) or `k3s` (bare-metal). | +| **ServiceOffer** | Namespaced CRD (`obol.org/v1alpha1`) describing a sell-side service, payment terms, route path, provenance, and registration metadata. | +| **eRPC** | In-cluster blockchain RPC gateway that multiplexes local node and public RPC upstreams. | +| **LiteLLM** | Central OpenAI-compatible model gateway in the `llm` namespace. | +| **OpenClaw instance** | A deployed AI agent runtime managed through `obol openclaw ...`. | +| **obol-agent** | The canonical default OpenClaw instance with elevated RBAC and a heartbeat-based reconciliation loop. | +| **x402-verifier** | ForwardAuth service that matches routes, emits `402 Payment Required`, and delegates verification to a facilitator. | +| **x402-buyer** | Sidecar in the LiteLLM pod that attaches pre-signed payment headers to paid upstream requests. | +| **Remote signer** | In-cluster signing service used by OpenClaw and registration flows; separate from the buyer sidecar. | +| **AGENT_BASE_URL** | Environment variable injected into the default agent deployment so generated registration documents use the current tunnel URL. | + +### 1.5 System Constraints + +| Constraint | Detail | +|-----------|--------| +| **Local-first execution** | The operator machine is the source of truth; cluster state, skills, wallet material, and configuration are rooted in local XDG paths. | +| **Actor priority** | The local operator path takes precedence over agent-developer ergonomics, which in turn take precedence over remote-buyer convenience. | +| **Backend exclusivity** | The stack supports exactly one active backend per config directory: `k3d` or `k3s`. Backend switching must tear down the old cluster first. | +| **Public exposure is optional** | The quick tunnel is dormant by default and only activates on sell flows unless a persistent DNS tunnel was provisioned. | +| **Chain domains are distinct** | Local installable networks, eRPC remote RPC aliases, sell-side payment chains, and ERC-8004 registration networks are related but not interchangeable. | +| **Least-public routing** | Frontend, eRPC, monitoring, and other operator surfaces are local-only under `hostnames: ["obol.stack"]`; public tunnel surfaces are intentionally narrower. | +| **Destructive cleanup is explicit** | `stack purge` preserves data by default; deleting root-owned persistent data requires `--force` and `sudo`. | +| **Phase discipline** | Future work must be recorded in explicit phase sections or ADR follow-ups, not blended into current-shipping behavior. | + +--- + +## 2. System Architecture + +### 2.1 High-Level Overview + +Obol Stack is a single-node, operator-managed platform with three concentric planes: + +1. **Control plane**: the `obol` CLI and XDG filesystem state. +2. **Cluster plane**: Traefik, LiteLLM, eRPC, OpenClaw, x402 services, frontend, monitoring, and Cloudflare tunnel connector. +3. **External plane**: Ollama and cloud LLM providers, ChainList, x402 facilitator, Cloudflare, and EVM chains used for payment or registration. + +### 2.2 Module Decomposition + +| Module | Purpose | Key Dependencies | +|--------|---------|-----------------| +| `cmd/obol` | User-facing CLI surface | `internal/*`, `urfave/cli/v3` | +| `internal/stack` | Stack init/up/down/purge, backend management, default infra sync | `internal/embed`, `internal/model`, `internal/openclaw`, `internal/agent`, `internal/tunnel` | +| `internal/model` | LiteLLM provider configuration and model synchronization | Kubernetes ConfigMaps/Secrets, Ollama, cloud APIs | +| `internal/network` | Local node deployment and eRPC remote upstream management | Embedded network charts, ChainList, eRPC ConfigMap | +| `internal/openclaw` | Instance onboarding, overlays, dashboard, token, skills, wallet flows | Helmfile, embedded skills, DNS, LiteLLM | +| `internal/agent` | Elevates the default OpenClaw instance with monetization RBAC and heartbeat behavior | Kubernetes RBAC, local data volume | +| `internal/x402` | Sell-side verifier, pricing config, watcher, setup, metrics | x402 facilitator, Traefik ForwardAuth | +| `internal/x402/buyer` | Buy-side paid upstream proxy with pre-signed auth pools | LiteLLM, remote sellers, ConfigMaps | +| `internal/erc8004` | Registration clients, network registry, types, signer integration | eRPC, registry contracts, remote signer | +| `internal/tunnel` | Quick and DNS Cloudflare tunnel lifecycle | cloudflared, Cloudflare APIs, frontend ConfigMap | +| `internal/app` | Managed application install/sync/list/delete | ArtifactHub, OCI/HTTP charts, Helmfile | + +### 2.3 Critical Lifecycles + +#### 2.3.1 Operator Startup Lifecycle + +1. `obol stack init` creates config directories, chooses a backend, writes `.stack-id` and `.stack-backend`, and materializes default infrastructure templates. +2. `obol stack up` starts the local cluster and writes `kubeconfig.yaml`. +3. `syncDefaults()` deploys baseline infrastructure via Helmfile. +4. `autoConfigureLLM()` patches LiteLLM for detected Ollama models and imported cloud credentials. +5. `openclaw.SetupDefault()` creates or re-syncs the default `obol-agent` instance. +6. `agent.Init()` patches monetization RBAC and injects `HEARTBEAT.md`. +7. DNS is configured for `obol.stack` and, if provisioned, a persistent tunnel is started. + +#### 2.3.2 Sell-Side Lifecycle + +1. The operator creates a sell surface using `obol sell http ...` or `obol sell inference ...`. +2. A `ServiceOffer` CR or host-side gateway deployment is created and persisted. +3. The `monetize.py` reconciler evaluates the offer through `ModelReady`, `UpstreamHealthy`, `PaymentGateReady`, `RoutePublished`, `Registered`, and `Ready`. +4. Traefik routes public traffic through x402 ForwardAuth. +5. If registration is enabled, `/.well-known/agent-registration.json` is published and on-chain registration is attempted. + +#### 2.3.3 Buy-Side Lifecycle + +1. The agent probes a seller to read its 402 pricing response. +2. The agent pre-signs a bounded batch of ERC-3009 authorizations through the remote signer. +3. Buyer config and auth pools are stored in `llm` namespace ConfigMaps. +4. LiteLLM receives a request for `paid/` and forwards to the local sidecar. +5. The sidecar retries the upstream request with `X-PAYMENT`, consumes one auth, and tracks remaining budget. + +--- + +## 3. Core Subsystems + +### 3.1 Stack Lifecycle + +#### 3.1.1 Purpose + +Provide a single CLI-managed entry point for provisioning, starting, stopping, and destroying the full local stack. + +#### 3.1.2 Inputs and Outputs + +Inputs: +- XDG or `OBOL_*` path environment variables. +- Backend selection (`k3d` or `k3s`). +- Local prerequisites such as Docker or local filesystem access. + +Outputs: +- Stack config under `$OBOL_CONFIG_DIR`. +- Persistent data under `$OBOL_DATA_DIR`. +- Runtime state under `$OBOL_STATE_DIR`. +- A working kubeconfig and running cluster. + +#### 3.1.3 Startup Sequence + +`stack up` is intentionally opinionated: +- Start backend. +- Write kubeconfig. +- Run Helmfile over embedded defaults. +- Auto-configure LiteLLM. +- Create or refresh default OpenClaw. +- Apply agent capabilities. +- Configure local DNS. +- Start tunnel only when persistent DNS state already exists. + +A Helmfile failure is treated as fatal and triggers an automatic `stack down` cleanup path. + +#### 3.1.4 Shutdown and Purge + +- `stack down` stops the cluster and DNS helper but preserves config and data. +- `stack purge` destroys cluster state and removes config. +- `stack purge --force` additionally removes persistent data and prompts for wallet backup before destruction. + +### 3.2 LLM Routing and Provider Management + +#### 3.2.1 Purpose + +Centralize model routing through one OpenAI-compatible gateway so OpenClaw instances and paid model paths use a single runtime interface. + +#### 3.2.2 Provider Model + +Supported provider classes on this branch: +- `ollama` +- `anthropic` +- `openai` +- custom OpenAI-compatible endpoints + +Key properties: +- LiteLLM config lives in `litellm-config` ConfigMap in namespace `llm`. +- Provider secrets live in `litellm-secrets`. +- Auto-discovery during `stack up` is best-effort, not required for later manual setup. +- After provider changes, configured models are synchronized back into OpenClaw overlays to avoid route drift. + +#### 3.2.3 Static Paid Namespace + +The buy-side path is intentionally static: +- Public model names are always `paid/`. +- LiteLLM keeps a permanent wildcard route. +- Purchased model changes update buyer ConfigMaps, not LiteLLM topology. + +This keeps the payment path isolated from the rest of model routing. + +### 3.3 Network Management and eRPC + +#### 3.3.1 Chain Domains + +Obol Stack uses four separate chain domains: + +| Domain | Current Source of Truth | Examples | +|-------|--------------------------|----------| +| Local installable networks | `internal/embed/networks/` | `ethereum`, `aztec` | +| eRPC remote RPC aliases | `internal/network/chainlist.go` | `base`, `mainnet`, `polygon`, `avalanche`, `hoodi` | +| Sell-side payment chains | `cmd/obol/sell.go` | `base-sepolia`, `base`, `ethereum` | +| ERC-8004 registration chains | `internal/erc8004/networks.go` | `base-sepolia`, `base`, `ethereum` | + +Documentation and behavior must not collapse these into a single “supported networks” statement. + +#### 3.3.2 Local Networks + +Local installable networks are embedded Helmfile/chart bundles. On this branch: +- `ethereum` +- `aztec` + +`obol network install ` renders `values.yaml` from annotated templates, copies the network bundle into `$OBOL_CONFIG_DIR/networks///`, and waits for explicit `network sync` to deploy it. + +#### 3.3.3 Remote RPC Networks + +`obol network add ` uses ChainList to fetch public HTTPS RPCs, filters and ranks them, and writes them into eRPC configuration. By default: +- only free/public HTTPS endpoints are accepted +- full-tracking endpoints are rejected +- write methods remain blocked + +`network remove` removes ChainList-sourced upstreams for a chain without touching local node upstreams or custom endpoints. + +#### 3.3.4 Route Exposure + +eRPC is exposed locally at `http://obol.stack/rpc` behind Traefik. Traffic is still passed through the x402 middleware path, but the verifier returns `200` for unmatched routes or routes with no active pricing rule. + +### 3.4 OpenClaw Runtime and Agent Capabilities + +#### 3.4.1 Purpose + +Manage AI agent instances as first-class stack workloads with operator-controlled overlays, credentials, skills, and wallets. + +#### 3.4.2 Instance Model + +OpenClaw instances are stored under: +- `$OBOL_CONFIG_DIR/applications/openclaw//` + +Each instance has: +- Helmfile deployment metadata +- Obol overlay values +- optional imported provider/channel settings +- skill injection into persistent volume paths + +The canonical default instance is `obol-agent`. It is re-synced idempotently by `stack up`. + +#### 3.4.3 Agent Elevation + +`agent.Init()` does not create a separate controller binary. Instead it: +- patches monetization ClusterRoleBindings and a pricing RoleBinding +- injects `HEARTBEAT.md` into the default agent workspace so heartbeat cycles run `monetize.py process --all --quick` + +This makes monetization behavior part of the default agent runtime, not a parallel control plane. + +#### 3.4.4 Operator Surfaces + +Key instance operations: +- onboard or scaffold +- sync +- retrieve or regenerate gateway token +- open dashboard +- manage skills +- backup or restore wallet material +- shell out to the embedded OpenClaw CLI + +### 3.5 Sell-Side Monetization + +#### 3.5.1 Purpose + +Expose local services through x402 payment gates and optional ERC-8004 public discovery without requiring a separate Kubernetes operator binary. + +#### 3.5.2 Operator Commands + +Current sell-side CLI surface: +- `sell inference` +- `sell http` +- `sell list` +- `sell status` +- `sell probe` +- `sell stop` +- `sell delete` +- `sell pricing` +- `sell register` + +#### 3.5.3 ServiceOffer CRD + +`ServiceOffer` is the declarative contract for sell-side workloads. Required fields are: +- `spec.upstream` +- `spec.payment` + +Optional but meaningful fields are: +- `spec.type` +- `spec.model` +- `spec.provenance` +- `spec.path` +- `spec.registration` + +Status includes: +- `conditions[]` +- `endpoint` +- `agentId` +- `registrationTxHash` + +#### 3.5.4 Reconciliation Stages + +The current skill-driven reconcile loop uses these stages: +1. `ModelReady` +2. `UpstreamHealthy` +3. `PaymentGateReady` +4. `RoutePublished` +5. `Registered` +6. `Ready` + +Registration is intentionally degradable. If the signer, RPC path, or gas funding is unavailable, the service can remain public and payment-gated with `Registered=True` and reason `OffChainOnly`. + +#### 3.5.5 Pricing Models + +Current pricing models on this branch: +- `perRequest` +- `perMTok` +- `perHour` +- `perEpoch` in schema, but not a first-class operator flow yet + +Current enforcement reality: +- `perRequest` is direct +- `perMTok` is approximated to a request price using `1000` tokens per request +- `perHour` is approximated to a request price using `5` minutes per request in the current monetization skill + +These approximations are current implementation behavior, not future exact-metering guarantees. + +#### 3.5.6 Standalone Inference Gateway + +`sell inference` supports two related paths: +- standalone host-side x402-gated gateway +- cluster-aware mode, where a host-side gateway is wrapped by a `ServiceOffer` and cluster routing + +Optional attestation-related inputs already exist on this branch: +- macOS Secure Enclave +- Linux TEE backends +- provenance metadata for experiment output + +### 3.6 Buy-Side Remote Inference + +#### 3.6.1 Purpose + +Allow agents to pay for remote x402-gated inference without giving the runtime access to live signing keys. + +#### 3.6.2 Design + +The buy-side path uses: +- a pre-signing step through the remote signer +- `x402-buyer-config` ConfigMap +- `x402-buyer-auths` ConfigMap +- an `x402-buyer` sidecar in the LiteLLM pod +- a static public model namespace `paid/` + +Runtime properties: +- zero signer access in the sidecar +- bounded spending equal to remaining auth count times unit price +- OpenAI-compatible reverse proxy interfaces +- `/healthz`, `/status`, and `/metrics` endpoints + +### 3.7 Tunnel, Discovery, Frontend, and Monitoring + +#### 3.7.1 Tunnel Modes + +Current tunnel modes: +- `quick`: dormant until a sell flow requires public exposure +- `dns`: persistent hostname-based tunnel created via browser login or API-token provisioning + +When a tunnel URL becomes available, the stack updates: +- `AGENT_BASE_URL` on the default agent deployment +- the frontend configuration ConfigMap + +#### 3.7.2 Public vs Local Routes + +Local-only operator routes: +- `http://obol.stack/` +- `http://obol.stack/rpc` +- monitoring and internal admin surfaces via hostname restriction + +Public tunnel routes: +- `/services//...` +- `/.well-known/agent-registration.json` +- storefront and machine-readable service catalog surfaces + +#### 3.7.3 Frontend and Monitoring + +The stack ships: +- `obol-frontend` namespace for the dashboard +- `monitoring` namespace with kube-prometheus-stack +- a PodMonitor for the buyer sidecar + +The frontend is allowed to discover namespaces, pods, ConfigMaps, Secrets, and `ServiceOffer` resources through an explicit ClusterRoleBinding. + +### 3.8 Application Management and Supporting Operations + +#### 3.8.1 Managed Applications + +`obol app install/sync/list/delete` lets operators treat arbitrary Helm charts as managed workloads under `$OBOL_CONFIG_DIR/applications///`. + +Supported chart references: +- `repo/chart` +- `repo/chart@version` +- `https://.../*.tgz` +- `oci://...` + +#### 3.8.2 Supporting Operations + +The branch also includes: +- update and upgrade commands +- flow scripts validating sell and buy paths +- optional subprojects such as `reth-erc8004-indexer` +- embedded skills for autoresearch-related workloads + +These supporting operations are part of the repository surface, but not all of them are yet first-class operator workflows. + +--- + +## 4. API / Protocol Definition + +### 4.1 CLI Surface + +| Surface | Current Commands | +|--------|-------------------| +| Stack | `stack init`, `stack up`, `stack down`, `stack purge` | +| Agent | `agent init` | +| Models | `model setup`, `model status`, `model sync`, `model pull`, `model list`, `model remove` | +| Networks | `network list`, `network install`, `network sync`, `network delete`, `network add`, `network remove`, `network status` | +| OpenClaw | `openclaw onboard`, `sync`, `token`, `list`, `delete`, `setup`, `dashboard`, `skills`, `wallet`, `cli` | +| Sell | `sell inference`, `http`, `list`, `status`, `probe`, `stop`, `delete`, `pricing`, `register` | +| Tunnel | `tunnel status`, `login`, `provision`, `restart`, `stop`, `logs` | +| Apps | `app install`, `sync`, `list`, `delete` | +| Operations | `update`, `upgrade`, `version`, Kubernetes passthrough commands | + +### 4.2 Kubernetes API and CRDs + +| Interface | Kind | Purpose | +|----------|------|---------| +| `obol.org/v1alpha1` | `ServiceOffer` | Declares sell-side services, pricing, provenance, and registration metadata | +| `gateway.networking.k8s.io/v1` | `HTTPRoute` | Exposes frontend, eRPC, public services, and registration document routes | +| `traefik.io/v1alpha1` | `Middleware` | ForwardAuth integration for x402 payment checks | +| `monitoring.coreos.com/v1` | `PodMonitor` | Scrapes buyer sidecar metrics | + +### 4.3 HTTP and Routing Surfaces + +| Surface | Location | Audience | Notes | +|--------|----------|----------|------| +| Frontend | `http://obol.stack/` | Local operator | Local-only hostname restriction | +| eRPC | `http://obol.stack/rpc` | Local operator, agent workloads | Route goes through Traefik middleware path | +| Public service routes | `https:///services//...` | Remote buyer | x402-gated | +| Registration document | `https:///.well-known/agent-registration.json` | Discovery clients | Public, no ForwardAuth | +| Buyer sidecar health | `http://127.0.0.1:8402/healthz` | In-cluster | Sidecar-local | +| Buyer sidecar status | `http://127.0.0.1:8402/status` | In-cluster | Sidecar-local | +| Buyer metrics | `/metrics` on buyer sidecar | Monitoring | Scraped by PodMonitor | + +### 4.4 Authentication and Authorization + +- OpenClaw dashboard and API access use a per-instance gateway token retrievable from `obol openclaw token`. +- Public sell-side routes rely on x402 payment verification rather than a user session. +- Kubernetes mutating actions are performed through local operator credentials or specific service accounts with explicit RBAC. +- The buyer sidecar authenticates payments with pre-signed vouchers, not a live signer. + +### 4.5 Rate Limiting and Quotas + +There is no global user quota service in the current branch. Effective limits are: +- finite pre-signed auth pools on the buy side +- route-level pricing configured in x402 verifier +- workload capacity imposed by local cluster resources and upstream services + +--- + +## 5. Data Model + +### 5.1 Filesystem Layout + +| Path | Purpose | +|------|---------| +| `$OBOL_CONFIG_DIR/.stack-id` | Persistent stack identity | +| `$OBOL_CONFIG_DIR/.stack-backend` | Active backend selection | +| `$OBOL_CONFIG_DIR/kubeconfig.yaml` | Cluster access for passthrough tools and CLI operations | +| `$OBOL_CONFIG_DIR/defaults/` | Rendered default infrastructure bundle | +| `$OBOL_CONFIG_DIR/networks///` | Local network deployment config | +| `$OBOL_CONFIG_DIR/applications///` | Managed application or OpenClaw instance config | +| `$OBOL_DATA_DIR/` | Persistent volumes, wallet data, OpenClaw workspaces | +| `$OBOL_STATE_DIR/` | Runtime logs and mutable state | + +### 5.2 Kubernetes Namespaces and Core Resources + +| Namespace | Core Resources | Purpose | +|----------|----------------|---------| +| `traefik` | Traefik, cloudflared, gateway | Ingress and tunnel connector | +| `llm` | LiteLLM, x402-buyer sidecar, buyer PodMonitor | Model gateway and buy-side runtime | +| `erpc` | eRPC, HTTPRoute, metadata ConfigMap | Blockchain RPC gateway | +| `x402` | x402-verifier, pricing config | Sell-side payment verification | +| `openclaw-obol-agent` | Default OpenClaw agent, remote signer | Canonical agent runtime | +| `openclaw-` | Additional OpenClaw instances | User-managed agent runtimes | +| `obol-frontend` | Frontend deployment, HTTPRoute, RBAC | Dashboard | +| `monitoring` | Prometheus stack | Metrics and observability | +| `reloader` | Stakater Reloader | Config/secret-triggered restarts | + +### 5.3 Key ConfigMaps, Secrets, and Documents + +| Object | Purpose | +|-------|---------| +| `litellm-config` | Model routing table for LiteLLM | +| `litellm-secrets` | Cloud provider API keys | +| `erpc-config` | eRPC upstream and network definitions | +| `obol-stack-config` | Frontend-readable stack metadata, including tunnel URL | +| `x402-pricing` | Sell-side route pricing for the verifier | +| `x402-buyer-config` | Buy-side upstream mapping | +| `x402-buyer-auths` | Pre-signed authorization pools | +| `cloudflared-tunnel-token` | DNS tunnel token for persistent Cloudflare tunnel | +| `so--registration` ConfigMap | Generated `agent-registration.json` for a ServiceOffer | + +### 5.4 Data Lifecycle + +- Stack identity and backend selection are created at `stack init` and persist until purge. +- Local network and application configs are created before cluster deployment and reused across syncs. +- Wallet material persists across `stack down` and ordinary deletes; explicit backup and restore flows exist for OpenClaw wallets. +- Buy-side auth pools are consumed monotonically and must be refilled. +- Registration JSON is regenerated when a ServiceOffer changes or registration status changes. + +--- + +## 6. Integration Points + +| System | Protocol | Purpose | Failure Mode | +|--------|----------|---------|-------------| +| Ollama | HTTP | Local model serving and discovery | Auto-config skips or later requests fail until operator configures a provider | +| Anthropic / OpenAI | HTTPS | Cloud model routing through LiteLLM | Provider remains unavailable; other routes continue | +| ChainList | HTTPS | Public RPC discovery for eRPC | `network add` fails or requires custom endpoint | +| x402 facilitator | HTTPS | Payment verification and settlement | Sell-side requests fail verification or operator runs verify-only/local test paths | +| Cloudflare | Browser auth, API, tunnel transport | Public exposure | Stack remains locally usable without tunnel | +| EVM chains via eRPC | JSON-RPC | Payments, registration, discovery queries | Registration degrades to off-chain or buyer/seller requests fail upstream | +| ArtifactHub / chart repos / OCI | HTTPS | Managed app installation | App install fails; core stack remains unaffected | + +--- + +## 7. Security Model + +### 7.1 Threat Model + +Primary threats: +- accidental public exposure of operator-only routes +- live signing key exposure to runtime components +- unintended mainnet write forwarding +- silent documentation drift that misstates operator guarantees +- orphaned or half-started infrastructure after failed deploys + +### 7.2 Wallet and Signing Boundaries + +- The buyer sidecar has no live signer access. +- Registration and other signing flows prefer the remote signer and may fall back to a private key file only when explicitly invoked. +- Secure Enclave and Linux TEE support exist for standalone inference paths, but are optional. + +### 7.3 Public Exposure Guardrails + +- Frontend, eRPC, monitoring, and similar operator surfaces are local-only under `obol.stack`. +- Public tunnel routes are intentionally narrower and centered on payment-gated services and discovery metadata. +- Quick tunnels are not started eagerly on `stack up`. + +### 7.4 Payment Trust Model + +- x402 payment proofs are verified through a facilitator. +- Non-HTTPS facilitator URLs are rejected except for loopback and container-internal development hosts. +- Route matching is explicit; unmatched routes pass through without payment requirements. + +### 7.5 RBAC Model + +- Default agent elevation is explicit and applied by `agent.Init()`. +- Frontend has a narrow but meaningful ClusterRole for discovery and `ServiceOffer` CRUD. +- Sell-side resource cleanup relies on namespaced ownership and cluster-scoped permissions where required. + +--- + +## 8. Error Handling + +### 8.1 Error Categories + +| Category | Example | Handling | +|----------|---------|---------| +| Prerequisite failure | backend missing, cluster not running | CLI exits non-zero with remediation hint | +| Partial deployment failure | Helmfile sync fails | stack auto-runs cleanup path | +| Unsupported chain-domain input | using an eRPC-only alias in a sell-side command | command fails with supported chain list | +| Upstream health failure | `ServiceOffer` upstream is unhealthy | reconcile stops before route publish | +| Registration failure | signer unavailable or wallet unfunded | degrade to `OffChainOnly` where supported | +| Buyer budget exhaustion | no remaining auths for `paid/` | request path fails until refill | +| Tunnel unavailability | quick or DNS tunnel cannot start | local stack remains usable; public path degraded | + +### 8.2 Error Response Contracts + +- CLI failures are non-zero exits with human-readable hints. +- x402 verifier emits HTTP `402 Payment Required` with pricing metadata. +- Buy-side proxy returns HTTP `404` when no purchased upstream matches the requested `paid/`. +- Registration degradation is recorded in `ServiceOffer.status.conditions`. + +### 8.3 Retry and Recovery + +- Model and provider configuration can be re-run safely. +- Network sync and app sync are explicit operator actions. +- Buyer auth pools can be refilled without rebuilding LiteLLM topology. +- Tunnel restarts are explicit and cheap for quick tunnels. + +--- + +## 9. Performance and Operations + +### 9.1 Operational Bounds + +| Metric | Current Bound | Measurement | +|--------|---------------|-------------| +| ChainList fetch timeout | 15 seconds | `internal/network/chainlist.go` timeout | +| Tunnel rollout wait | 30 seconds | `tunnel.EnsureRunning()` rollout status | +| LiteLLM rollout wait | 90 seconds | `model.RestartLiteLLM()` rollout status | +| Buyer metrics scrape interval | 30 seconds | PodMonitor definition | +| `perMTok` approximation | 1000 tokens/request | monetization skill constant | +| `perHour` approximation | 5 minutes/request | monetization skill constant | + +### 9.2 Observability + +- Prometheus stack is part of the default infrastructure. +- Buyer sidecar exports metrics for auth pools and payment attempts. +- Tunnel status, OpenClaw token flows, and sell status all have dedicated CLI surfaces. + +--- + +## 10. Phased Rollout + +### Phase 1: PR288 Baseline + +- Local-first stack lifecycle with `k3d` and `k3s` +- Default infrastructure deployment through Helmfile +- LiteLLM as the central model gateway +- eRPC local and remote RPC management +- OpenClaw instance lifecycle and the elevated `obol-agent` +- Sell-side x402 routes, `ServiceOffer` reconcile loop, `sell probe`, and optional ERC-8004 registration +- Buy-side `paid/*` remote inference path with bounded-risk sidecar +- Quick and DNS tunnel modes +- Local frontend and monitoring stack +- Managed application install/sync/list/delete + +### Phase 2: Explicit Follow-Ups + +- Replace approximation-based pricing for `perMTok` and `perHour` with exact metering where supported. +- Add operator-safe JSON, headless, and introspection surfaces to the CLI before promoting broader agent or MCP control paths. +- Package `reth-erc8004-indexer` as a first-class managed application instead of a repository-adjacent subproject. +- Promote autoresearch worker and coordinator workflows from skill-level building blocks into operator-visible flows with clearer provenance surfaces. +- Tighten reconcile and heartbeat latency rather than relying on the current default cadence. +- Extend and document multi-chain sell-side support only when the CLI, verifier, and registration surfaces agree on the contract. +- Extend monetized publication beyond the current inference-centric path only after explicit isolation, ownership, and routing rules are specified. +- Validate the buy-side path more deeply through LiteLLM-routed hands-off tests and in-pod skill smoke coverage. +- Enforce canonical spec drift checks through Codex hooks and CI. + +--- + +## 11. Testing Strategy + +### 11.1 Test Levels + +| Level | Tooling | What It Covers | +|-------|---------|----------------| +| Unit | `go test ./...` | Core package logic, serializers, matchers, config handling | +| Integration | package-level integration tests | Kubernetes-backed paths, OpenClaw flows, x402 verifier paths | +| Flow / E2E | `flows/flow-06`, `07`, `08`, `10` | Sell setup, verify, buy path, anvil facilitator loop | +| Skill smoke | `tests/skills_smoke_test.py`, focused Python tests | Embedded skill assets and runtime contracts | +| BDD spec | `features/*.feature` plus existing executable features | Behavioral contract for current and future implementation | + +### 11.2 Test Data Strategy + +- Use deterministic local stack IDs and local config dirs in tests where possible. +- Prefer fixture-based ChainList data for RPC selection tests. +- Treat real public tunnel URLs, facilitator endpoints, and on-chain registration as integration concerns, not unit-test assumptions. + +### 11.3 CI/CD Integration + +- Code changes that alter the operator, agent, buyer, seller, tunnel, or routing contract must update this root-level bundle. +- Operator guides in `docs/guides/` may remain for context, but they are not authoritative once this bundle exists. diff --git a/docs/adr/0001-local-first-stack-runtime.md b/docs/adr/0001-local-first-stack-runtime.md new file mode 100644 index 00000000..ffa8f3bc --- /dev/null +++ b/docs/adr/0001-local-first-stack-runtime.md @@ -0,0 +1,20 @@ +# ADR-0001: Local-First Stack Runtime + +**Date**: 2026-03-29 +**Status**: Accepted + +**Impacts**: SPEC Sections 1.3, 3.1, 5.1, 6, 7.3 + +## Context + +Obol Stack serves operators running a full agent platform from their own machine. The system needs reproducible local cluster lifecycle control, predictable filesystem ownership, and a recovery path that does not depend on remote control planes. + +## Decision + +The stack remains local-first. The operator machine owns config, binaries, and persistent data, while `k3d` and `k3s` are the supported backend runtime options exposed through one `obol stack` lifecycle. Public exposure is optional and layered on top of a usable local baseline rather than required for startup. + +## Consequences + +- **Positive**: Startup, recovery, and inspection flows stay operator-centric and easier to reason about. +- **Negative**: Some cloud-native assumptions, such as always-on public endpoints or remote state stores, are intentionally deprioritized. +- **Neutral**: Future hosted or multi-node modes must be expressed as new phases rather than silently widening the local-first contract. diff --git a/docs/adr/0002-central-litellm-gateway.md b/docs/adr/0002-central-litellm-gateway.md new file mode 100644 index 00000000..e4c5fdad --- /dev/null +++ b/docs/adr/0002-central-litellm-gateway.md @@ -0,0 +1,20 @@ +# ADR-0002: Central LiteLLM Gateway + +**Date**: 2026-03-29 +**Status**: Accepted + +**Impacts**: SPEC Sections 2.3, 3.2, 4.3, 7.1 + +## Context + +The stack needs one consistent model routing surface for local Ollama models, cloud APIs, and paid remote models. Per-instance provider wiring leads to duplicated credentials, stale model lists, and inconsistent behavior across agents. + +## Decision + +LiteLLM is the central cluster-wide model gateway. OpenClaw instances and operator flows route through LiteLLM for normal model access, while provider credentials and static paid-route configuration remain centralized in the `llm` namespace. + +## Consequences + +- **Positive**: Model routing becomes uniform across operator, agent, and buyer paths. +- **Negative**: LiteLLM readiness becomes a critical dependency for most inference surfaces. +- **Neutral**: Direct-to-provider experiments remain possible, but they are exceptions to the main platform contract rather than the default architecture. diff --git a/docs/adr/0003-distinct-network-domains.md b/docs/adr/0003-distinct-network-domains.md new file mode 100644 index 00000000..afef3990 --- /dev/null +++ b/docs/adr/0003-distinct-network-domains.md @@ -0,0 +1,20 @@ +# ADR-0003: Distinct Network Domains + +**Date**: 2026-03-29 +**Status**: Accepted + +**Impacts**: SPEC Sections 3.3, 3.5, 3.6, 4.2 + +## Context + +The platform touches several network concepts that look similar but are not interchangeable: installable local networks, remote RPC aliases, sell-side payment chains, and ERC-8004 registration networks. Previous spec work blurred those domains and created false support claims. + +## Decision + +The spec and CLI contract must keep these network domains separate. A chain appearing in one subsystem, such as the low-level x402 resolver, does not automatically expand support claims for other subsystems. Multi-chain sell-side support may only be documented once the CLI, payment verifier, and registration surfaces agree on the same contract. + +## Consequences + +- **Positive**: Support claims stay factual and users can tell which network surface they are configuring. +- **Negative**: Documentation is less compact because one generic “supported networks” list is intentionally avoided. +- **Neutral**: Future multi-chain expansion requires aligned implementation work across several modules before the spec can widen the contract. diff --git a/docs/adr/0004-openclaw-elevated-agent-runtime.md b/docs/adr/0004-openclaw-elevated-agent-runtime.md new file mode 100644 index 00000000..87d40dc1 --- /dev/null +++ b/docs/adr/0004-openclaw-elevated-agent-runtime.md @@ -0,0 +1,20 @@ +# ADR-0004: OpenClaw as the Elevated Agent Runtime + +**Date**: 2026-03-29 +**Status**: Accepted + +**Impacts**: SPEC Sections 3.4, 5.2, 7.5 + +## Context + +Obol Stack needs an automation runtime that can operate inside the cluster, consume embedded skills, and act on behalf of the operator for selected workflows. Building a separate controller family for every automation path would fragment the control model. + +## Decision + +The default elevated automation runtime is an OpenClaw deployment, `obol-agent`, with carefully scoped elevated permissions and embedded skills. Additional OpenClaw instances remain operator-managed deployments and do not inherit the same elevated role automatically. + +## Consequences + +- **Positive**: The platform reuses one agent runtime model for operator workflows and skill execution. +- **Negative**: Elevated RBAC and skill distribution must be reviewed carefully because the default agent has broader authority than ordinary instances. +- **Neutral**: New autonomous behaviors should first be expressed as skills against this runtime before introducing dedicated controllers. diff --git a/docs/adr/0005-serviceoffer-skill-reconcile-loop.md b/docs/adr/0005-serviceoffer-skill-reconcile-loop.md new file mode 100644 index 00000000..9eacb480 --- /dev/null +++ b/docs/adr/0005-serviceoffer-skill-reconcile-loop.md @@ -0,0 +1,20 @@ +# ADR-0005: ServiceOffer-Driven Sell-Side Reconcile Loop + +**Date**: 2026-03-29 +**Status**: Accepted + +**Impacts**: SPEC Sections 3.5, 4.2, 5.3, 8 + +## Context + +Sell-side publication needs declarative state, observable status, and reconciliation across Kubernetes routing, pricing, and optional registration. Prior proposals considered separate controllers or looser imperative flows. + +## Decision + +Sell-side publication is driven by the `ServiceOffer` custom resource and reconciled by the elevated agent's monetize skill. The reconcile loop advances through explicit stages that cover model readiness, upstream health, payment gate setup, route publication, optional registration, and final readiness. + +## Consequences + +- **Positive**: Operators get one declarative resource and one status model for sell-side lifecycle. +- **Negative**: Reconcile latency is bounded by the agent heartbeat cadence rather than a dedicated controller loop. +- **Neutral**: Future generalized agent-authored services should extend this pattern only if they preserve explicit ownership, isolation, and stage visibility. diff --git a/docs/adr/0006-static-paid-namespace-buyer-sidecar.md b/docs/adr/0006-static-paid-namespace-buyer-sidecar.md new file mode 100644 index 00000000..0e05e8df --- /dev/null +++ b/docs/adr/0006-static-paid-namespace-buyer-sidecar.md @@ -0,0 +1,20 @@ +# ADR-0006: Static Paid Namespace with a Bounded-Risk Buyer Sidecar + +**Date**: 2026-03-29 +**Status**: Accepted + +**Impacts**: SPEC Sections 3.2.3, 3.6, 7.2, 7.4 + +## Context + +Remote paid inference needs a stable buyer-facing model contract, but giving the request path direct access to live signing authority would create a large security and spend risk. + +## Decision + +Paid remote models are exposed through a static `paid/*` namespace at LiteLLM and fulfilled by a buyer sidecar that holds only a bounded pool of pre-signed authorizations. The sidecar handles payment retries and forwarding without receiving live signer authority. + +## Consequences + +- **Positive**: The buyer path is easier to integrate and materially safer than a live-signing proxy. +- **Negative**: Capacity is limited by the pre-signed auth pool and requires replenishment workflows. +- **Neutral**: Observability for auth exhaustion and payment retries becomes a first-class operational concern. diff --git a/docs/adr/0007-local-only-operator-surfaces-with-optional-public-discovery.md b/docs/adr/0007-local-only-operator-surfaces-with-optional-public-discovery.md new file mode 100644 index 00000000..79cb9dc2 --- /dev/null +++ b/docs/adr/0007-local-only-operator-surfaces-with-optional-public-discovery.md @@ -0,0 +1,20 @@ +# ADR-0007: Local-Only Operator Surfaces with Optional Public Discovery + +**Date**: 2026-03-29 +**Status**: Accepted + +**Impacts**: SPEC Sections 3.7, 4.3, 7.3 + +## Context + +The stack needs public reachability for paid services and optional discovery, but it also exposes sensitive operator surfaces such as the frontend, eRPC gateway, and monitoring. + +## Decision + +Operator surfaces remain local-only by default. Tunnel exposure is scoped to the routes that explicitly need it, and public discovery metadata follows the current tunnel address rather than widening local control-plane surfaces. + +## Consequences + +- **Positive**: Public monetization and discovery can coexist with conservative operator safety boundaries. +- **Negative**: Public operator dashboards and remote admin UX remain out of scope for the current contract. +- **Neutral**: If public operator surfaces are ever introduced, they require an explicit architectural change rather than an incremental tunnel tweak. diff --git a/docs/adr/0008-canonical-root-spec-bundle-and-codex-hooks.md b/docs/adr/0008-canonical-root-spec-bundle-and-codex-hooks.md new file mode 100644 index 00000000..ace317a6 --- /dev/null +++ b/docs/adr/0008-canonical-root-spec-bundle-and-codex-hooks.md @@ -0,0 +1,20 @@ +# ADR-0008: Canonical Root-Level Spec Bundle with Codex Hook Guardrails + +**Date**: 2026-03-29 +**Status**: Accepted + +**Impacts**: SPEC Sections 10, 11.3 and CONTRIBUTING.md + +## Context + +The repository accumulated parallel plan files, stale design notes, and an incorrect `docs/specs/` bundle that drifted from both the code and the original backend-service-spec-bundler design. The project needed one canonical spec location and a lightweight mechanism to catch future drift during development. + +## Decision + +The repository follows the original backend-service-spec-bundler layout at repo root: `SPEC.md`, `ARCHITECTURE.md`, `BEHAVIORS_AND_EXPECTATIONS.md`, `CONTRIBUTING.md`, `features/`, and `docs/adr/`. Codex hooks are added as guardrails to remind the model of these conventions and to flag spec-impacting code changes when the canonical bundle was not updated in the same turn. + +## Consequences + +- **Positive**: The bundle has one authoritative location and drift becomes easier to detect. +- **Negative**: Contributors must maintain the canonical docs alongside behavior changes instead of relying on scattered planning notes. +- **Neutral**: Hooks assist developer workflow, but CI and human review still remain the final enforcement layer. diff --git a/docs/adr/0009-phase-2-exact-metering-after-pre-request-gate.md b/docs/adr/0009-phase-2-exact-metering-after-pre-request-gate.md new file mode 100644 index 00000000..2699a5cb --- /dev/null +++ b/docs/adr/0009-phase-2-exact-metering-after-pre-request-gate.md @@ -0,0 +1,20 @@ +# ADR-0009: Phase 2 Exact Metering After the Pre-Request Payment Gate + +**Date**: 2026-03-29 +**Status**: Proposed + +**Impacts**: SPEC Sections 3.5.5, 4.5, 10 + +## Context + +The PR288 baseline supports `perMTok` and `perHour` pricing, but current enforcement relies on approximation before execution. The platform needs a clearer future direction for exact post-response accounting without discarding the existing pre-request payment gate. + +## Decision + +Phase 2 exact metering, where implemented, should augment the current pre-request payment gate rather than replace it. Authorization remains the entry check, while measured usage becomes a post-response accounting and observability concern for supported protocols. + +## Consequences + +- **Positive**: The current gatekeeping model remains intact while exact accounting improves fidelity where it is technically feasible. +- **Negative**: The platform must operate two related billing surfaces during transition: pre-request authorization and post-response accounting. +- **Neutral**: Streaming and non-OpenAI-compatible formats may continue to use approximation until a stronger metering contract exists. diff --git a/docs/adr/0010-phase-2-agent-ready-cli-surfaces.md b/docs/adr/0010-phase-2-agent-ready-cli-surfaces.md new file mode 100644 index 00000000..747eaa95 --- /dev/null +++ b/docs/adr/0010-phase-2-agent-ready-cli-surfaces.md @@ -0,0 +1,20 @@ +# ADR-0010: Phase 2 Agent-Ready CLI Surfaces + +**Date**: 2026-03-29 +**Status**: Proposed + +**Impacts**: SPEC Sections 1.3, 4.1, 10 + +## Context + +The platform is increasingly consumed by agents as well as human operators. Human-first CLI ergonomics are still primary, but the repository also contains future-work notes for structured JSON output, headless prompt handling, and richer introspection. + +## Decision + +Phase 2 agent-facing improvements should add structured output, non-interactive input paths, and machine-friendly introspection without replacing the human-first operator contract. The local operator remains the primary actor, so agent-ready surfaces are an extension of the CLI rather than a separate control plane by default. + +## Consequences + +- **Positive**: Agents and future MCP adapters gain a safer path to consume the CLI without scraping human output. +- **Negative**: Every new machine-facing surface must preserve compatibility with existing operator workflows and documentation. +- **Neutral**: A dedicated MCP layer remains optional and should be introduced only if the structured CLI surface proves insufficient. diff --git a/docs/getting-started.md b/docs/getting-started.md index 13366094..3e97546c 100644 --- a/docs/getting-started.md +++ b/docs/getting-started.md @@ -1,5 +1,7 @@ # Getting Started with the Obol Stack +> This is an operator guide. The canonical system contract lives in [SPEC.md](../SPEC.md), [ARCHITECTURE.md](../ARCHITECTURE.md), and [BEHAVIORS_AND_EXPECTATIONS.md](../BEHAVIORS_AND_EXPECTATIONS.md). + This guide walks you through installing the Obol Stack, starting a local Kubernetes cluster, testing LLM inference through the AI agent, and optionally monetizing your compute. > [!IMPORTANT] diff --git a/docs/guides/monetize-inference.md b/docs/guides/monetize-inference.md index eb14e8fd..5eb44f4f 100644 --- a/docs/guides/monetize-inference.md +++ b/docs/guides/monetize-inference.md @@ -1,5 +1,7 @@ # How to Monetize Your Inference with Obol Stack +> This is an operator workflow guide. The canonical contract for sell-side and buy-side behavior lives in [SPEC.md](../../SPEC.md), [ARCHITECTURE.md](../../ARCHITECTURE.md), and [BEHAVIORS_AND_EXPECTATIONS.md](../../BEHAVIORS_AND_EXPECTATIONS.md). + This guide walks you through exposing a local LLM as a paid API endpoint using the Obol Stack. By the end, you'll have: - A local Ollama model serving inference @@ -10,9 +12,9 @@ This guide walks you through exposing a local LLM as a paid API endpoint using t > [!NOTE] > `--per-mtok` is supported for inference pricing, but phase 1 still charges an > approximate flat request price derived as `perMTok / 1000` using a fixed -> `1000 tok/request` assumption. Exact token metering is deferred to the -> follow-up `x402-meter` design described in -> [`docs/plans/per-token-metering.md`](../plans/per-token-metering.md). +> `1000 tok/request` assumption. Exact token metering is tracked as phase 2 +> follow-up work in [SPEC.md](../../SPEC.md#10-phased-rollout) and +> [ADR-0009](../adr/0009-phase-2-exact-metering-after-pre-request-gate.md). > [!IMPORTANT] > The monetize subsystem is alpha software on the `feat/secure-enclave-inference` branch. diff --git a/docs/guides/monetize_sell_side_testing_log.md b/docs/guides/monetize_sell_side_testing_log.md deleted file mode 100644 index befd67e0..00000000 --- a/docs/guides/monetize_sell_side_testing_log.md +++ /dev/null @@ -1,399 +0,0 @@ -# Monetize Sell-Side Testing Log - -Full lifecycle walkthrough of the hardened monetize subsystem on a fresh dev cluster, using the real x402-rs facilitator against an Anvil fork of base-sepolia. - -**Branch**: `fix/review-hardening` (off `feat/secure-enclave-inference`) -**Date**: 2026-02-27 -**Cluster**: `obol-stack-sweeping-man` (k3d, 1 server node) - ---- - -## Prerequisites - -```bash -# Working directory: the obol-stack repo (or worktree) -cd /path/to/obol-stack - -# Environment — set these in every terminal session -export OBOL_DEVELOPMENT=true -export OBOL_CONFIG_DIR=$(pwd)/.workspace/config -export OBOL_BIN_DIR=$(pwd)/.workspace/bin -export OBOL_DATA_DIR=$(pwd)/.workspace/data - -# Alias for brevity (optional) -alias obol="$OBOL_BIN_DIR/obol" -``` - -**External dependencies** (must be installed separately): - -| Dependency | Install | Purpose | -|-----------|---------|---------| -| Docker | [docker.com](https://docker.com) | k3d runs inside Docker | -| Foundry (`anvil`, `cast`) | `curl -L https://foundry.paradigm.xyz \| bash && foundryup` | Local base-sepolia fork | -| Rust toolchain | [rustup.rs](https://rustup.rs) | Building x402-rs facilitator | -| Python 3 + venv | System package manager | Signing the EIP-712 payment header | -| x402-rs | `git clone https://github.com/x402-rs/x402-rs ~/Development/R&D/x402-rs` | Real x402 facilitator | -| Ollama | [ollama.com](https://ollama.com) | Local LLM inference (must be running on host) | -| `/etc/hosts` entry | `echo "127.0.0.1 obol.stack" \| sudo tee -a /etc/hosts` | `obolup.sh` does this, or add manually | - ---- - -## Phase 1: Build & Cluster - -```bash -# 1. Build the obol binary from the hardened branch -go build -o .workspace/bin/obol ./cmd/obol - -# 2. Wipe any previous cluster -obol stack down 2>/dev/null; obol stack purge -f 2>/dev/null -rm -rf "$OBOL_CONFIG_DIR" "$OBOL_DATA_DIR" - -# 3. Initialize fresh cluster config -obol stack init - -# 4. Bring up the cluster -# (builds x402-verifier Docker image locally, deploys all infrastructure) -obol stack up - -# 5. Verify — all pods should be Running -obol kubectl get pods -A -``` - -Expected: ~18 pods across namespaces (`erpc`, `kube-system`, `llm`, `monitoring`, `obol-frontend`, `openclaw-default`, `reloader`, `traefik`, `x402`). x402-verifier should have **2 replicas**. - ---- - -## Phase 2: Verify Hardening - -```bash -# Split RBAC ClusterRoles exist -obol kubectl get clusterrole openclaw-monetize-read -obol kubectl get clusterrole openclaw-monetize-workload - -# x402 namespace Role exists -obol kubectl get role openclaw-x402-pricing -n x402 - -# x402 HA: 2 replicas -obol kubectl get deploy x402-verifier -n x402 -o jsonpath='{.spec.replicas}' -# → 2 - -# PDB active -obol kubectl get pdb -n x402 -# → x402-verifier minAvailable=1 allowedDisruptions=1 -``` - ---- - -## Phase 3: Deploy Agent - -```bash -# 6. Deploy the obol-agent singleton -# - creates namespace openclaw-obol-agent -# - deploys openclaw + remote-signer pods -# - injects 24 skills (including monetize) -# - patches all 3 RBAC bindings to the agent's ServiceAccount -obol agent init - -# 7. Verify RBAC bindings point to the agent's ServiceAccount -obol kubectl get clusterrolebinding openclaw-monetize-read-binding \ - -o jsonpath='{.subjects}' -obol kubectl get clusterrolebinding openclaw-monetize-workload-binding \ - -o jsonpath='{.subjects}' -obol kubectl get rolebinding openclaw-x402-pricing-binding -n x402 \ - -o jsonpath='{.subjects}' -# All three should show: -# [{"kind":"ServiceAccount","name":"openclaw","namespace":"openclaw-obol-agent"}] -``` - ---- - -## Phase 4: Configure Payment & Create Offer - -```bash -# 8. Configure x402 pricing (seller wallet + chain) -obol sell pricing \ - --wallet 0x70997970C51812dc3A010C7d01b50e0d17dc79C8 \ - --chain base-sepolia - -# 9. Verify Ollama has the model available on the host -curl -s http://localhost:11434/api/tags | python3 -c \ - "import sys,json; [print(m['name']) for m in json.load(sys.stdin)['models']]" -# Should include qwen3:0.6b — if not: -# ollama pull qwen3:0.6b - -# 10. Create ServiceOffer CR -obol sell http my-qwen \ - --type inference \ - --model qwen3:0.6b \ - --runtime ollama \ - --per-request 0.001 \ - --network base-sepolia \ - --pay-to 0x70997970C51812dc3A010C7d01b50e0d17dc79C8 \ - --namespace llm \ - --upstream ollama \ - --port 11434 \ - --path /services/my-qwen -# → serviceoffer.obol.org/my-qwen created -``` - ---- - -## Phase 5: Agent Reconciliation - -```bash -# 11. Trigger reconciliation from inside the agent pod -# (The heartbeat cron runs every 30 min by default — -# this is the same script it would execute) -obol kubectl exec -n openclaw-obol-agent deploy/openclaw -c openclaw -- \ - python3 /data/.openclaw/skills/monetize/scripts/monetize.py process --all - -# Expected output: -# Processing 1 pending offer(s)... -# Reconciling llm/my-qwen... -# Checking if model qwen3:0.6b is available... -# Model qwen3:0.6b already available -# Health-checking http://ollama.llm.svc.cluster.local:11434/health... -# Upstream reachable (HTTP 404 — acceptable for health check) -# Creating Middleware x402-my-qwen... -# Added pricing route: /services/my-qwen/* → 0.001 USDC -# Creating HTTPRoute so-my-qwen... -# ServiceOffer llm/my-qwen is Ready - -# 12. Verify all 6 conditions are True -obol sell status my-qwen --namespace llm -# → ModelReady=True -# UpstreamHealthy=True -# PaymentGateReady=True -# RoutePublished=True -# Registered=True (Skipped) -# Ready=True -``` - ---- - -## Phase 6: Test 402 Gate (No Payment) - -```bash -# 13. Request without payment → expect HTTP 402 -curl -s -w "\nHTTP %{http_code}" -X POST \ - "http://obol.stack:8080/services/my-qwen/v1/chat/completions" \ - -H "Content-Type: application/json" \ - -d '{"model":"qwen3:0.6b","messages":[{"role":"user","content":"Hello"}],"stream":false}' - -# Expected: HTTP 402 + JSON body: -# { -# "x402Version": 1, -# "error": "Payment required for this resource", -# "accepts": [{ -# "scheme": "exact", -# "network": "base-sepolia", -# "maxAmountRequired": "1000", -# "asset": "0x036CbD53842c5426634e7929541eC2318f3dCF7e", -# "payTo": "0x70997970C51812dc3A010C7d01b50e0d17dc79C8", -# ... -# }] -# } -``` - ---- - -## Phase 7: Start x402-rs Facilitator + Anvil - -```bash -# 14. Start Anvil forking base-sepolia (background, port 8545) -anvil --fork-url https://sepolia.base.org --port 8545 --host 0.0.0.0 --silent & - -# Verify Anvil is running: -curl -s -X POST http://localhost:8545 \ - -H "Content-Type: application/json" \ - -d '{"jsonrpc":"2.0","method":"eth_chainId","params":[],"id":1}' -# → {"jsonrpc":"2.0","id":1,"result":"0x14a34"} (84532 = base-sepolia) - -# 15. Build x402-rs facilitator (first time only, ~2 min) -cd ~/Development/R\&D/x402-rs/facilitator && cargo build --release && cd - - -# 16. Start facilitator with Anvil config (background, port 4040) -# config-anvil.json points RPC at host.docker.internal:8545 -~/Development/R\&D/x402-rs/facilitator/target/release/facilitator \ - --config ~/Development/R\&D/x402-rs/config-anvil.json & - -# Verify facilitator is running: -curl -s http://localhost:4040/supported -# → {"kinds":[{"x402Version":1,"scheme":"exact","network":"base-sepolia"}, ...], -# "signers":{"eip155:84532":["0xf39Fd6e51aad88F6F4ce6aB8827279cffFb92266"]}} - -# 17. Verify buyer (Anvil account 0) has USDC on the fork -cast call 0x036CbD53842c5426634e7929541eC2318f3dCF7e \ - "balanceOf(address)(uint256)" \ - 0xf39Fd6e51aad88F6F4ce6aB8827279cffFb92266 \ - --rpc-url http://localhost:8545 -# → non-zero balance (e.g. 287787514 = ~287 USDC) -``` - ---- - -## Phase 8: Patch Verifier → Local Facilitator - -```bash -# 18. Point x402-verifier at the local x402-rs facilitator -# macOS: host.docker.internal -# Linux: host.k3d.internal -obol kubectl patch configmap x402-pricing -n x402 --type merge -p '{ - "data": { - "pricing.yaml": "wallet: 0x70997970C51812dc3A010C7d01b50e0d17dc79C8\nchain: base-sepolia\nfacilitatorURL: http://host.docker.internal:4040\nverifyOnly: false\nroutes:\n- pattern: \"/services/my-qwen/*\"\n price: \"0.001\"\n description: \"ServiceOffer my-qwen\"\n payTo: \"0x70997970C51812dc3A010C7d01b50e0d17dc79C8\"\n network: \"base-sepolia\"\n" - } -}' - -# 19. Restart verifier to pick up immediately -# (otherwise the file watcher takes 60-120s) -obol kubectl rollout restart deploy/x402-verifier -n x402 -obol kubectl rollout status deploy/x402-verifier -n x402 --timeout=60s -``` - ---- - -## Phase 9: Sign Payment & Test Paid Request - -```bash -# 20. Create venv and install eth-account -python3 -m venv /tmp/x402-venv -/tmp/x402-venv/bin/pip install eth-account --quiet - -# 21. Write the payment signing script -cat > /tmp/x402-pay.py << 'PYEOF' -#!/usr/bin/env python3 -"""Sign an x402 V1 exact payment header using Anvil account 0.""" -import json, base64, os -from eth_account import Account -from eth_account.messages import encode_typed_data - -PRIVATE_KEY = "0xac0974bec39a17e36ba4a6b4d238ff944bacb478cbed5efcae784d7bf4f2ff80" -PAYER = "0xf39Fd6e51aad88F6F4ce6aB8827279cffFb92266" -PAY_TO = "0x70997970C51812dc3A010C7d01b50e0d17dc79C8" -USDC = "0x036CbD53842c5426634e7929541eC2318f3dCF7e" -CHAIN_ID = 84532 -AMOUNT = "1000" # 0.001 USDC in 6-decimal micro-units -NONCE = "0x" + os.urandom(32).hex() - -signable = encode_typed_data(full_message={ - "types": { - "EIP712Domain": [ - {"name": "name", "type": "string"}, - {"name": "version", "type": "string"}, - {"name": "chainId", "type": "uint256"}, - {"name": "verifyingContract", "type": "address"}, - ], - "TransferWithAuthorization": [ - {"name": "from", "type": "address"}, - {"name": "to", "type": "address"}, - {"name": "value", "type": "uint256"}, - {"name": "validAfter", "type": "uint256"}, - {"name": "validBefore", "type": "uint256"}, - {"name": "nonce", "type": "bytes32"}, - ], - }, - "primaryType": "TransferWithAuthorization", - "domain": { - "name": "USDC", "version": "2", - "chainId": CHAIN_ID, "verifyingContract": USDC, - }, - "message": { - "from": PAYER, "to": PAY_TO, - "value": int(AMOUNT), - "validAfter": 0, "validBefore": 4294967295, - "nonce": bytes.fromhex(NONCE[2:]), - }, -}) - -signed = Account.sign_message(signable, PRIVATE_KEY) - -# IMPORTANT: x402-rs wire format requires validAfter/validBefore as STRINGS -payload = { - "x402Version": 1, - "scheme": "exact", - "network": "base-sepolia", - "payload": { - "signature": "0x" + signed.signature.hex(), - "authorization": { - "from": PAYER, "to": PAY_TO, - "value": AMOUNT, # string (decimal_u256) - "validAfter": "0", # string (UnixTimestamp) - "validBefore": "4294967295", # string (UnixTimestamp) - "nonce": NONCE, # string (B256 hex) - }, - }, - "resource": { - "payTo": PAY_TO, "maxAmountRequired": AMOUNT, - "asset": USDC, "network": "base-sepolia", - }, -} -print(base64.b64encode(json.dumps(payload).encode()).decode()) -PYEOF - -# 22. Generate payment header and send paid request -PAYMENT=$(/tmp/x402-venv/bin/python3 /tmp/x402-pay.py) - -curl -s -w "\nHTTP %{http_code}" -X POST \ - "http://obol.stack:8080/services/my-qwen/v1/chat/completions" \ - -H "Content-Type: application/json" \ - -H "X-PAYMENT: $PAYMENT" \ - -d '{"model":"qwen3:0.6b","messages":[{"role":"user","content":"Say hello in exactly 3 words"}],"stream":false}' - -# Expected: HTTP 200 + full Ollama inference response JSON -``` - ---- - -## Phase 10: Lifecycle Cleanup - -```bash -# 23. Stop offer (removes pricing route from ConfigMap, keeps CR) -obol sell stop my-qwen --namespace llm - -# 24. Restart verifier so removed route takes effect immediately -obol kubectl rollout restart deploy/x402-verifier -n x402 - -# 25. Verify endpoint is now free (no payment required) -curl -s -w "\nHTTP %{http_code}" -X POST \ - "http://obol.stack:8080/services/my-qwen/v1/chat/completions" \ - -H "Content-Type: application/json" \ - -d '{"model":"qwen3:0.6b","messages":[{"role":"user","content":"Hello"}],"stream":false}' -# → HTTP 200 (free endpoint, no 402) - -# 26. Full delete — removes CR + Middleware + HTTPRoute (ownerRef cascade) -obol sell delete my-qwen --namespace llm --force - -# 27. Verify everything is cleaned up -obol kubectl get serviceoffers,middleware,httproutes -n llm -# → No resources found in llm namespace. - -# 28. Stop background processes and clean up temp files -pkill -f "anvil.*fork-url" -pkill -f "facilitator.*config-anvil" -rm -rf /tmp/x402-venv /tmp/x402-pay.py -``` - ---- - -## Reference: Key Addresses - -| Role | Address | Note | -|------|---------|------| -| Seller (payTo) | `0x70997970C51812dc3A010C7d01b50e0d17dc79C8` | Anvil account 1 | -| Buyer (payer) | `0xf39Fd6e51aad88F6F4ce6aB8827279cffFb92266` | Anvil account 0 | -| Buyer private key | `0xac0974bec39a17e36ba4a6b4d238ff944bacb478cbed5efcae784d7bf4f2ff80` | Anvil default — never use in production | -| USDC (base-sepolia) | `0x036CbD53842c5426634e7929541eC2318f3dCF7e` | Circle USDC on base-sepolia | -| Chain ID | `84532` | base-sepolia | - -## Reference: Key Gotchas - -| Gotcha | Detail | -|--------|--------| -| **macOS vs Linux host bridging** | macOS: `host.docker.internal`. Linux: `host.k3d.internal` (step 18) | -| **x402-rs timestamp format** | `validAfter`/`validBefore` must be **strings** (`"0"`, `"4294967295"`), not integers. x402-rs `UnixTimestamp` deserializes from stringified u64 | -| **ConfigMap propagation delay** | x402-verifier file watcher takes 60-120s. Use `kubectl rollout restart` for immediate effect | -| **Heartbeat interval** | 30 minutes by default. For interactive testing, exec into the pod and run `monetize.py process --all` manually (step 11) | -| **`/etc/hosts`** | Must have `127.0.0.1 obol.stack`. `obolup.sh` sets this during install, or add manually | -| **`OBOL_DEVELOPMENT=true`** | Required for `obol stack up` to build the x402-verifier Docker image locally instead of pulling from registry | -| **Anvil fork freshness** | Each `anvil` restart creates a fresh fork. USDC balances come from the forked base-sepolia state at the time of fork | -| **x402-rs `config-anvil.json`** | Ships with the x402-rs repo. Points `eip155:84532` RPC at `host.docker.internal:8545` (Anvil). Adjust if your Anvil is on a different port | diff --git a/docs/guides/monetize_test_coverage_report.md b/docs/guides/monetize_test_coverage_report.md deleted file mode 100644 index d4c0262b..00000000 --- a/docs/guides/monetize_test_coverage_report.md +++ /dev/null @@ -1,666 +0,0 @@ -# Monetize Subsystem — Test Coverage Report - -**Branch**: `fix/review-hardening` (off `feat/secure-enclave-inference`) -**Date**: 2026-02-27 -**Total integration tests**: 46 across 3 files - ---- - -## Section Overview - -``` -┌──────────────────────────────────────────────────────────────────┐ -│ TEST PYRAMID │ -│ │ -│ ▲ │ -│ ╱ ╲ Phase 8: FULL (1) │ -│ ╱ ╲ ← tunnel+Ollama+x402-rs+EIP-712 │ -│ ╱─────╲ │ -│ ╱ ╲ Phase 5+: Real Facilitator (1) │ -│ ╱ ╲ ← real x402-rs, real EIP-712 │ -│ ╱───────────╲ │ -│ ╱ ╲ Phase 6+7: Tunnel + Fork (5) │ -│ ╱ ╲ ← real Ollama, mock facilitator │ -│ ╱─────────────────╲ │ -│ ╱ ╲ Phase 4+5: Payment + E2E (8) │ -│ ╱ ╲ ← mock facilitator, real gate │ -│ ╱─────────────────╲ │ -│ ╱ ╲ Phase 3: Routing (6) │ -│ ╱ ╲ ← real Traefik, Anvil RPC │ -│ ╱───────────────────────╲ │ -│ ╱ ╲ Phase 2: RBAC + Recon (6) │ -│ ╱ ╲ ← real agent in pod │ -│ ╱─────────────────────────────╲ │ -│ ╱ ╲ Phase 1: CRD (7) │ -│ ╱ ╲ ← schema validation │ -│ ╱───────────────────────────────────╲ │ -│ ╱ ╲ Base: Inference (12)│ -│ ╱_______________________________________╲ ← Ollama + skills │ -│ │ -└──────────────────────────────────────────────────────────────────┘ -``` - ---- - -## Phase 1 — CRD Lifecycle (7 tests) - -**What it covers**: ServiceOffer custom resource schema validation, CRUD operations, printer columns, status subresource isolation. - -**Realism**: Low (data-plane only, no reconciliation or traffic). - -``` -┌─────────────────────────────────────────────────────┐ -│ TEST BOUNDARY │ -│ │ -│ kubectl apply ──▶ ┌──────────────────┐ │ -│ │ ServiceOffer CR │ │ -│ kubectl get ──▶ │ (obol.org CRD) │ │ -│ └──────────────────┘ │ -│ kubectl patch ──▶ │ │ -│ kubectl delete──▶ ▼ │ -│ API Server validates: │ -│ ✓ wallet regex (^0x[0-9a-fA-F]{40}$)│ -│ ✓ status subresource isolation │ -│ ✓ printer columns (TYPE, PRICE) │ -│ │ -│ ┌─────────────────────────────────────────────┐ │ -│ │ NOT TESTED: reconciler, routing, payment │ │ -│ └─────────────────────────────────────────────┘ │ -└─────────────────────────────────────────────────────┘ -``` - -| Test | What It Proves | -|------|----------------| -| `CRD_Exists` | CRD installed in cluster | -| `CRD_CreateGet` | Spec fields round-trip correctly | -| `CRD_List` | kubectl list works | -| `CRD_StatusSubresource` | Status patch doesn't mutate spec | -| `CRD_WalletValidation` | Invalid wallet rejected by API server | -| `CRD_PrinterColumns` | `kubectl get` shows TYPE, PRICE, NETWORK | -| `CRD_Delete` | CR deletion works | - -**Gap vs real world**: No agent involvement. A real user runs `obol sell http`, not raw kubectl. - ---- - -## Phase 2 — RBAC + Reconciliation (6 tests) - -**What it covers**: Split RBAC roles exist and are bound, agent can read/write CRs from inside pod, reconciler handles unhealthy upstreams, idempotent re-processing. - -**Realism**: Medium (real agent pod, real RBAC, but no traffic or payment). - -``` -┌─────────────────────────────────────────────────────────────────┐ -│ TEST BOUNDARY │ -│ │ -│ ┌─────────────┐ RBAC Check ┌─────────────────────────┐ │ -│ │ Test Runner │ ────────────────▶ │ ClusterRole: │ │ -│ │ (kubectl get)│ │ openclaw-monetize-read │ │ -│ └─────────────┘ │ openclaw-monetize-wkld │ │ -│ │ │ Role: │ │ -│ │ │ openclaw-x402-pricing │ │ -│ │ └─────────────────────────┘ │ -│ │ │ -│ │ kubectl exec │ -│ ▼ │ -│ ┌─────────────────────────────────┐ │ -│ │ obol-agent pod │ │ -│ │ monetize.py process │──▶ ServiceOffer CR │ -│ │ monetize.py process --all │ (status conditions) │ -│ │ monetize.py list │ │ -│ └─────────────────────────────────┘ │ -│ │ │ -│ ▼ │ -│ UpstreamHealthy=False (no real upstream) │ -│ HEARTBEAT_OK (no pending offers) │ -│ │ -│ ┌──────────────────────────────────────────────────────────┐ │ -│ │ NOT TESTED: Traefik routing, x402 gate, payment, tunnel │ │ -│ └──────────────────────────────────────────────────────────┘ │ -└─────────────────────────────────────────────────────────────────┘ -``` - -| Test | What It Proves | -|------|----------------| -| `RBAC_ClusterRolesExist` | Split RBAC roles deployed by k3s manifests | -| `RBAC_BindingsPatched` | `obol agent init` patches all 3 bindings | -| `Monetize_ListEmpty` | Agent skill lists zero offers | -| `Monetize_ProcessAllEmpty` | Heartbeat returns OK with no work | -| `Monetize_ProcessUnhealthy` | Sets UpstreamHealthy=False for missing svc | -| `Monetize_Idempotent` | Second reconcile doesn't error | - -**Gap vs real world**: No upstream service exists. Reconciliation never reaches PaymentGateReady or RoutePublished. - ---- - -## Phase 3 — Routing with Anvil Upstream (6 tests) - -**What it covers**: Full 6-condition reconciliation with a real upstream (Anvil fork), Traefik Middleware + HTTPRoute creation, traffic forwarding, owner-reference cascade on delete. - -**Realism**: Medium-High (real cluster networking, real Traefik, real upstream). No payment gate yet. - -``` -┌─────────────────────────────────────────────────────────────────────┐ -│ TEST BOUNDARY │ -│ │ -│ ┌──────────┐ │ -│ │ Anvil │ ◀── Host machine (port N) │ -│ │ (fork of │ forking Base Sepolia │ -│ │ base-sep)│ │ -│ └────┬─────┘ │ -│ │ ClusterIP + EndpointSlice │ -│ │ (anvil-rpc.test-ns.svc) │ -│ ▼ │ -│ ┌──────────────────────────────────────────────────────────┐ │ -│ │ k3d cluster │ │ -│ │ │ │ -│ │ Agent reconciles: │ │ -│ │ ✓ UpstreamHealthy (HTTP health-check to Anvil) │ │ -│ │ ✓ PaymentGateReady (Middleware created) │ │ -│ │ ✓ RoutePublished (HTTPRoute created) │ │ -│ │ ✓ Ready │ │ -│ │ │ │ -│ │ ┌─────────────┐ ┌──────────────┐ ┌──────────┐ │ │ -│ │ │ Traefik GW │────▶│ HTTPRoute │────▶│ Anvil │ │ │ -│ │ │ :8080 │ │ /services/x │ │ upstream │ │ │ -│ │ └─────────────┘ └──────────────┘ └──────────┘ │ │ -│ │ │ │ -│ │ curl POST obol.stack:8080/services/x │ │ -│ │ → eth_blockNumber response from Anvil ✓ │ │ -│ └──────────────────────────────────────────────────────────┘ │ -│ │ -│ ┌────────────────────────────────────────────────────────────┐ │ -│ │ NOT TESTED: x402 ForwardAuth (no facilitator), no 402 │ │ -│ └────────────────────────────────────────────────────────────┘ │ -└─────────────────────────────────────────────────────────────────────┘ -``` - -| Test | What It Proves | -|------|----------------| -| `Route_AnvilUpstream` | Anvil responds locally | -| `Route_FullReconcile` | All 4 conditions reach True | -| `Route_MiddlewareCreated` | ForwardAuth Middleware exists | -| `Route_HTTPRouteCreated` | HTTPRoute has correct parentRef | -| `Route_TrafficRoutes` | HTTP through Traefik reaches Anvil | -| `Route_DeleteCascades` | ownerRef GC cleans up derived resources | - -**Gap vs real world**: No payment gate. Requests go straight through without x402 gating. Free endpoint, not monetized. - ---- - -## Phase 4 — Payment Gate (4 tests) - -**What it covers**: x402-verifier health, 402 response without payment, 402 response body format (x402 spec compliance), 200 response with mock payment. - -**Realism**: Medium-High. Real x402-verifier, real Traefik ForwardAuth. Mock facilitator always says `isValid: true`. - -``` -┌──────────────────────────────────────────────────────────────────────┐ -│ TEST BOUNDARY │ -│ │ -│ ┌───────┐ POST /services/x ┌──────────┐ ForwardAuth │ -│ │Client │ ─────────────────────▶ │ Traefik │ ──────────────▶ │ -│ │(test) │ │ Gateway │ │ │ -│ └───────┘ └──────────┘ │ │ -│ │ │ ▼ │ -│ │ │ ┌──────────────┐│ -│ │ No X-PAYMENT header │ │ x402-verifier││ -│ │ ──────────────────▶ │ │ (real pod) ││ -│ │ │ │ ││ -│ │ ◀── 402 + pricing JSON │ │ Checks: ││ -│ │ │ │ ✓ route match││ -│ │ │ │ ✓ has header ││ -│ │ X-PAYMENT: │ │ ✓ call facil.││ -│ │ ──────────────────▶ │ │ ││ -│ │ │ │ ┌────────┐ ││ -│ │ │ │ │ Mock │ ││ -│ │ ◀── 200 + Anvil response │ │ │ Facil. │ ││ -│ │ │ │ │ always │ ││ -│ │ │ │ │ valid │ ││ -│ │ │ │ └────────┘ ││ -│ │ │ └──────────────┘│ -│ │ -│ ┌──────────────────────────────────────────────────────────────┐ │ -│ │ MOCK: facilitator (no real signature validation) │ │ -│ │ MOCK: payment header (fake JSON, not real EIP-712) │ │ -│ └──────────────────────────────────────────────────────────────┘ │ -└──────────────────────────────────────────────────────────────────────┘ -``` - -| Test | What It Proves | -|------|----------------| -| `PaymentGate_VerifierHealthy` | /healthz and /readyz return 200 | -| `PaymentGate_402WithoutPayment` | No payment → 402 | -| `PaymentGate_RequirementsFormat` | 402 body matches x402 spec | -| `PaymentGate_200WithPayment` | Mock payment → 200 | - -**Gap vs real world**: The facilitator never validates the EIP-712 signature. Any well-formed JSON base64 header passes. Wire format bugs (string vs int types) are invisible. - ---- - -## Phase 5 — Full E2E CLI-Driven (3 tests) - -**What it covers**: `obol sell http` CLI → CR creation → agent reconciliation → 402 → 200 → `obol sell list/status/delete`. Heartbeat auto-reconciliation (90s wait). - -**Realism**: High for the CLI path. Still uses mock facilitator for payment. - -``` -┌──────────────────────────────────────────────────────────────────────┐ -│ TEST BOUNDARY │ -│ │ -│ ┌────────────────┐ │ -│ │ obol sell│ │ -│ │ offer my-qwen │ ──▶ ServiceOffer CR │ -│ │ --type inference │ │ -│ │ --model qwen3 │ │ -│ │ --per-request .. │ │ -│ └────────────────┘ │ -│ │ │ -│ ▼ │ -│ ┌──────────────────────────────────────────────────────────────────┐ │ -│ │ Agent pod (autonomous reconciliation) │ │ -│ │ │ │ -│ │ monetize.py process ──▶ 6 conditions ──▶ Ready=True │ │ -│ │ │ │ -│ │ OR: heartbeat cron (every 30min) auto-reconciles │ │ -│ └──────────────────────────────────────────────────────────────────┘ │ -│ │ │ -│ ▼ │ -│ ┌──────────────────────────────────────────────────────────────────┐ │ -│ │ obol sell list → shows offer │ │ -│ │ obol sell status → shows all conditions │ │ -│ │ obol sell delete → cleans up CR + derived resources │ │ -│ └──────────────────────────────────────────────────────────────────┘ │ -│ │ -│ Still uses mock facilitator for payment verification. │ -└──────────────────────────────────────────────────────────────────────┘ -``` - -| Test | What It Proves | -|------|----------------| -| `E2E_OfferLifecycle` | Full CLI → create → reconcile → pay → delete | -| `E2E_HeartbeatReconciles` | Cron-driven reconciliation without manual trigger | -| `E2E_ListAndStatus` | CLI query commands work | - -**Gap vs real world**: Mock facilitator. No real model (Anvil upstream, not Ollama). - ---- - -## Phase 6 — Tunnel E2E + Ollama (2 tests) - -**What it covers**: Real Ollama inference through the full stack, including Cloudflare tunnel accessibility. Agent-autonomous offer management. - -**Realism**: Very High for the local path. Tunnel tests require CF credentials. - -``` -┌───────────────────────────────────────────────────────────────────────────┐ -│ TEST BOUNDARY │ -│ │ -│ ┌─────────┐ POST /services/x/v1/chat/completions │ -│ │ Client │ ────────────────────────────────────────▶ │ -│ └─────────┘ │ │ -│ │ ▼ │ -│ │ ┌──────────┐ ForwardAuth ┌──────────────────┐ │ -│ │ │ Traefik │ ──────────────▶ │ x402-verifier │ │ -│ │ │ Gateway │ │ → mock facilitator│ │ -│ │ └──────────┘ └──────────────────┘ │ -│ │ │ │ -│ │ │ payment valid │ -│ │ ▼ │ -│ │ ┌──────────┐ │ -│ │ │ Ollama │ ← REAL model (qwen3:0.6b) │ -│ │ │ (llm ns) │ REAL inference response │ -│ │ └──────────┘ │ -│ │ │ -│ │ Also tests via tunnel: │ -│ │ ┌─────────────────────┐ │ -│ │ │ Cloudflare Tunnel │ ← if CF credentials configured │ -│ │ │ https:// │ │ -│ │ └─────────────────────┘ │ -│ │ │ -│ ┌────────────────────────────────────────────────────────────────┐ │ -│ │ REAL: Ollama inference, Traefik routing, x402-verifier │ │ -│ │ MOCK: facilitator (still always-valid) │ │ -│ │ OPTIONAL: CF tunnel (skipped without credentials) │ │ -│ └────────────────────────────────────────────────────────────────┘ │ -└───────────────────────────────────────────────────────────────────────────┘ -``` - -| Test | What It Proves | -|------|----------------| -| `Tunnel_OllamaMonetized` | Real model → real inference → mock payment → response | -| `Tunnel_AgentAutonomousMonetize` | Agent creates/manages offer without CLI | - -**Gap vs real world**: Mock facilitator. Real-world buyers send real EIP-712 signatures. - ---- - -## Phase 7 — Fork Validation with Mock Facilitator (2 tests) - -**What it covers**: Anvil-fork-backed upstream with mock facilitator verify/settle tracking, agent error recovery from bad upstream state. - -**Realism**: Medium-High. Real on-chain environment (forked), but fake payment validation. - -``` -┌──────────────────────────────────────────────────────────────────────┐ -│ TEST BOUNDARY │ -│ │ -│ ┌──────────┐ ┌─────────────────┐ │ -│ │ Anvil │ ◀── fork of Base Sepolia │ Mock Facilitator│ │ -│ │ (real │ real block numbers │ ✓ /verify │ │ -│ │ chain │ real chain ID 84532 │ → always valid│ │ -│ │ state) │ │ ✓ /settle │ │ -│ └──────────┘ │ → always ok │ │ -│ │ │ Tracks call │ │ -│ │ EndpointSlice │ counts only │ │ -│ ▼ └─────────────────┘ │ -│ ┌───────────────────────────────────┐ │ │ -│ │ Full reconciliation pipeline │ │ │ -│ │ ✓ UpstreamHealthy (Anvil health) │ │ │ -│ │ ✓ PaymentGateReady │ │ │ -│ │ ✓ RoutePublished │ │ │ -│ │ ✓ Ready │◀───────────┘ │ -│ │ │ │ -│ │ Also tests: │ │ -│ │ ✓ Pricing route in ConfigMap │ │ -│ │ ✓ Delete cleans up pricing route │ │ -│ │ ✓ Agent self-heals from bad state │ │ -│ └───────────────────────────────────┘ │ -│ │ -│ ┌──────────────────────────────────────────────────────────────┐ │ -│ │ MOCK: facilitator (no signature validation, no USDC check) │ │ -│ │ MOCK: payment header (fake JSON blob) │ │ -│ └──────────────────────────────────────────────────────────────┘ │ -└──────────────────────────────────────────────────────────────────────┘ -``` - -| Test | What It Proves | -|------|----------------| -| `Fork_FullPaymentFlow` | 402 → 200 with mock, verify/settle called | -| `Fork_AgentSkillIteration` | Agent recovers from unreachable upstream | - -**Gap vs real world**: Facilitator never validates signatures. USDC balance irrelevant. - ---- - -## Phase 5+ — Real Facilitator Payment (1 test) ← CLOSEST TO PRODUCTION - -**What it covers**: The entire payment cryptography stack. Real x402-rs facilitator binary, real EIP-712 TransferWithAuthorization signatures, real USDC balance on Anvil fork, real signature validation. - -**Realism**: Very High. The only mock remaining is the chain settlement (Anvil resets after test). - -``` -┌──────────────────────────────────────────────────────────────────────────┐ -│ TEST BOUNDARY │ -│ │ -│ ┌──────────┐ Buyer: Anvil Account[0] │ -│ │ go test │ 10 USDC minted via anvil_setStorageAt │ -│ │ │ │ -│ │ Signs real EIP-712 │ -│ │ TransferWithAuthorization │ -│ │ (ERC-3009) │ -│ │ │ -│ │ ┌─────────────────────────────────────┐ │ -│ │ │ TypedData: │ │ -│ │ │ domain: USD Coin / v2 / 84532 │ │ -│ │ │ from: buyer address │ │ -│ │ │ to: seller address │ │ -│ │ │ value: "1000" (0.001 USDC) │ │ -│ │ │ validAfter: "0" ← STRING! │ │ -│ │ │ validBefore: "4294967295" ← STRING│ │ -│ │ │ nonce: random 32 bytes │ │ -│ │ └─────────────────────────────────────┘ │ -│ └──────────┘ │ -│ │ │ -│ │ X-PAYMENT: base64(envelope) │ -│ ▼ │ -│ ┌──────────┐ ForwardAuth ┌──────────────────┐ │ -│ │ Traefik │ ───────────────▶ │ x402-verifier │ │ -│ │ Gateway │ │ (real pod) │ │ -│ └──────────┘ └────────┬─────────┘ │ -│ │ │ │ -│ │ │ POST /verify │ -│ │ ▼ │ -│ │ ┌──────────────────┐ │ -│ │ │ x402-rs │ ← REAL binary │ -│ │ │ facilitator │ │ -│ │ │ │ │ -│ │ │ ✓ Decodes header │ │ -│ │ │ ✓ Validates EIP │ │ -│ │ │ 712 signature │ │ -│ │ │ ✓ Checks USDC │ │ -│ │ │ balance on │ │ -│ │ │ Anvil fork │ │ -│ │ │ ✓ Returns │ │ -│ │ │ isValid: true │ │ -│ │ └────────┬─────────┘ │ -│ │ │ │ -│ │ │ connected to: │ -│ │ ▼ │ -│ │ ┌──────────────────┐ │ -│ │ │ Anvil Fork │ ← REAL chain state │ -│ │ │ (Base Sepolia) │ │ -│ │ │ chain ID: 84532 │ │ -│ │ │ │ │ -│ │ │ Has USDC balance │ │ -│ │ │ for buyer address │ │ -│ │ └──────────────────┘ │ -│ │ │ -│ │ 200 OK │ -│ ▼ │ -│ Response from Anvil (eth_blockNumber) │ -│ │ -│ ┌───────────────────────────────────────────────────────────────────┐ │ -│ │ REAL: x402-rs binary, EIP-712 signing, USDC state, verifier, │ │ -│ │ Traefik ForwardAuth, agent reconciliation, CRD lifecycle │ │ -│ │ SIMULATED: chain (Anvil fork, not mainnet), settlement (no │ │ -│ │ actual USDC transfer, Anvil state resets) │ │ -│ └───────────────────────────────────────────────────────────────────┘ │ -└──────────────────────────────────────────────────────────────────────────┘ -``` - -| Test | What It Proves | -|------|----------------| -| `Fork_RealFacilitatorPayment` | Real EIP-712 → real x402-rs → real validation → 200 | - -**Gap vs real world**: Settlement doesn't transfer real USDC (Anvil fork resets). No real L1/L2 block confirmation. No Cloudflare tunnel in this test. - ---- - -## Phase 8 — Full Stack: Tunnel + Ollama + Real Facilitator (1 test) ← PRODUCTION EQUIVALENT - -**What it covers**: Everything. Real Ollama inference, real x402-rs facilitator, real EIP-712 signatures, USDC-funded Anvil fork, and requests entering through the Cloudflare quick tunnel's dynamic `*.trycloudflare.com` URL. - -**Realism**: Maximum. This is a production sell-side scenario with the only difference being Anvil (not mainnet) and a quick tunnel (not a persistent named tunnel). - -``` -┌──────────────────────────────────────────────────────────────────────────────┐ -│ TEST BOUNDARY │ -│ │ -│ BUYER (test runner) │ -│ ┌──────────────────────────────────────────────────────────────────────┐ │ -│ │ 1. Signs real EIP-712 TransferWithAuthorization (ERC-3009) │ │ -│ │ domain: USD Coin / v2 / 84532 │ │ -│ │ from: 0xf39F... (Anvil account[0], funded with 10 USDC) │ │ -│ │ to: 0x7099... (seller) │ │ -│ │ value: "1000" (0.001 USDC) │ │ -│ │ nonce: random 32 bytes │ │ -│ └──────────────────────────────────────────────────────────────────────┘ │ -│ │ │ -│ │ POST https://.trycloudflare.com/services/test-tunnel-real/ │ -│ │ /v1/chat/completions │ -│ │ X-PAYMENT: base64(real EIP-712 envelope) │ -│ ▼ │ -│ ┌──────────────────────────────────────┐ │ -│ │ Cloudflare Edge (quick tunnel) │ ← REAL Cloudflare infrastructure │ -│ │ *.trycloudflare.com │ dynamic URL, non-persistent │ -│ │ TLS termination │ │ -│ └────────────────┬─────────────────────┘ │ -│ │ cloudflared connector (k3d pod) │ -│ ▼ │ -│ ┌──────────────────────────────────────┐ │ -│ │ Traefik Gateway (:443 internal) │ ← REAL Traefik, Gateway API │ -│ │ HTTPRoute: /services/test-tunnel-* │ │ -│ │ ForwardAuth middleware │ │ -│ └────────────────┬─────────────────────┘ │ -│ │ ForwardAuth request │ -│ ▼ │ -│ ┌──────────────────────────────────────┐ │ -│ │ x402-verifier (2 replicas, PDB) │ ← REAL verifier pod │ -│ │ Extracts X-PAYMENT header │ │ -│ │ Looks up pricing route in ConfigMap │ │ -│ │ Calls facilitator /verify │ │ -│ └────────────────┬─────────────────────┘ │ -│ │ POST /verify │ -│ ▼ │ -│ ┌──────────────────────────────────────┐ │ -│ │ x402-rs facilitator (host process) │ ← REAL Rust binary │ -│ │ │ │ -│ │ ✓ Decodes x402 V1 envelope │ │ -│ │ ✓ Recovers signer from EIP-712 sig │ │ -│ │ ✓ Checks USDC balance on Anvil │ │ -│ │ ✓ Validates nonce not replayed │ │ -│ │ ✓ Returns isValid: true + payer │ │ -│ └────────────────┬─────────────────────┘ │ -│ │ connected to: │ -│ ▼ │ -│ ┌──────────────────────────────────────┐ │ -│ │ Anvil Fork (host process) │ ← REAL chain state (Base Sepolia) │ -│ │ chain ID: 84532 │ USDC balances, nonce tracking │ -│ │ 10 USDC minted to buyer │ │ -│ └──────────────────────────────────────┘ │ -│ │ -│ ◀── verifier returns 200 (payment valid) │ -│ │ │ -│ ▼ Traefik forwards to upstream │ -│ ┌──────────────────────────────────────┐ │ -│ │ Ollama (llm namespace) │ ← REAL model inference │ -│ │ model: qwen2.5 / qwen3:0.6b │ actual LLM generation │ -│ │ │ │ -│ │ POST /v1/chat/completions │ │ -│ │ → "say hello in one word" │ │ -│ │ ← {"choices":[{"message":...}]} │ │ -│ └──────────────────────────────────────┘ │ -│ │ -│ ◀── 200 + inference response returned to buyer via tunnel │ -│ │ -│ ┌───────────────────────────────────────────────────────────────────────┐ │ -│ │ REAL: tunnel, Traefik, x402-verifier, x402-rs, EIP-712, USDC, │ │ -│ │ Ollama, agent reconciliation, CRD, RBAC, Gateway API │ │ -│ │ SIMULATED: chain (Anvil fork, not mainnet), settlement │ │ -│ │ NOT PERSISTENT: quick tunnel URL changes on restart │ │ -│ └───────────────────────────────────────────────────────────────────────┘ │ -└──────────────────────────────────────────────────────────────────────────────┘ -``` - -| Test | What It Proves | -|------|----------------| -| `Tunnel_RealFacilitatorOllama` | Buyer → CF tunnel → x402 gate → real EIP-712 validation → real Ollama inference → response via tunnel | - -**What makes this different from every other test**: - -| Component | Phase 6 (existing) | Phase 5+ (Anvil) | Phase 8 (this) | -|-----------|-------------------|-------------------|----------------| -| Inference | Real Ollama | Anvil RPC | Real Ollama | -| Facilitator | Mock (always valid) | Real x402-rs | Real x402-rs | -| Payment signature | Fake JSON blob | Real EIP-712 | Real EIP-712 | -| USDC balance | N/A | Minted on Anvil | Minted on Anvil | -| Entry point | obol.stack:8080 | obol.stack:8080 | **\*.trycloudflare.com** | -| TLS | None (HTTP) | None (HTTP) | **Real TLS** (CF edge) | - -**Gap vs real world**: Quick tunnel URL is ephemeral (not a persistent `myagent.example.com`). USDC settlement doesn't transfer real tokens (Anvil resets). No real L1/L2 block finality. - ---- - -## Base Tests — Inference + Skills (12 tests) - -**What they cover**: Ollama/Anthropic/OpenAI/Google/Zhipu inference through LiteLLM, skill staging and injection, skill visibility in pod, skill-driven agent responses. - -**Realism**: Very High for inference path. These are the "does the AI actually work" tests. - -Not directly part of the monetize subsystem, but they validate the upstream service that gets monetized. - ---- - -## Realism Comparison Matrix - -``` - CRD RBAC Agent Traefik x402 Facil. EIP-712 USDC Ollama Tunnel TLS - ─── ──── ───── ─────── ──── ────── ─────── ──── ────── ────── ─── -Phase 1 (CRD) ✓ -Phase 2 (RBAC) ✓ ✓ ✓ -Phase 3 (Route) ✓ ✓ ✓ ✓ -Phase 4 (Gate) ✓ ✓ ✓ ✓ ✓ MOCK MOCK -Phase 5 (E2E) ✓ ✓ ✓ ✓ ✓ MOCK MOCK -Phase 6 (Tunnel) ✓ ✓ ✓ ✓ ✓ MOCK MOCK ✓ ✓ ✓ -Phase 7 (Fork) ✓ ✓ ✓ ✓ ✓ MOCK MOCK N/A -Phase 5+ (Real) ✓ ✓ ✓ ✓ ✓ REAL REAL REAL -Phase 8 (FULL) ✓ ✓ ✓ ✓ ✓ REAL REAL REAL ✓ ✓ ✓ - - ✓ = real component MOCK = simulated REAL = production-equivalent -``` - ---- - -## What's Still Not Tested - -| Gap | Impact | Mitigation | -|-----|--------|------------| -| **Real USDC settlement** | Anvil fork doesn't persist transfers | Would need Base Sepolia testnet with real USDC faucet | -| **Persistent named tunnel** | Quick tunnel URL is ephemeral | Phase 8 uses quick tunnel; persistent requires `obol tunnel provision` with CF credentials | -| **Concurrent buyers** | All tests are single-buyer | Add load test with multiple signed payments | -| **ERC-8004 registration** | `obol sell register` not tested end-to-end | Would need real Base Sepolia tx (gas costs) | -| **Price change hot-reload** | Agent updates price in CR → verifier picks up new amount | Test exists partially in Phase 4 format checks | -| **Buy-side flow** | No buyer CLI/SDK test | Planned as next phase | - ---- - -## Running the Tests - -```bash -# Prerequisites -export OBOL_DEVELOPMENT=true -export OBOL_CONFIG_DIR=$(pwd)/../../.workspace/config -export OBOL_BIN_DIR=$(pwd)/../../.workspace/bin -export OBOL_DATA_DIR=$(pwd)/../../.workspace/data - -# Phase 1-3: CRD + RBAC + Routing (fast, ~2min) -go test -tags integration -v -timeout 5m \ - -run 'TestIntegration_CRD_|TestIntegration_RBAC_|TestIntegration_Monetize_|TestIntegration_Route_' \ - ./internal/openclaw/ - -# Phase 4-5: Payment gate + E2E (medium, ~5min) -go test -tags integration -v -timeout 10m \ - -run 'TestIntegration_PaymentGate_|TestIntegration_E2E_' \ - ./internal/openclaw/ - -# Phase 6: Tunnel + Ollama (slow, ~8min, needs Ollama model cached) -go test -tags integration -v -timeout 15m \ - -run 'TestIntegration_Tunnel_' \ - ./internal/openclaw/ - -# Phase 7: Fork validation (medium, ~5min) -go test -tags integration -v -timeout 10m \ - -run 'TestIntegration_Fork_FullPaymentFlow|TestIntegration_Fork_AgentSkillIteration' \ - ./internal/openclaw/ - -# Phase 5+: Real facilitator (medium, ~5min, needs x402-rs) -export X402_RS_DIR=/path/to/x402-rs -go test -tags integration -v -timeout 15m \ - -run 'TestIntegration_Fork_RealFacilitatorPayment' \ - ./internal/openclaw/ - -# Phase 8: FULL — tunnel + Ollama + real facilitator (~8min, needs everything) -export X402_RS_DIR=/path/to/x402-rs -go test -tags integration -v -timeout 15m \ - -run 'TestIntegration_Tunnel_RealFacilitatorOllama' \ - ./internal/openclaw/ - -# x402 verifier standalone E2E -go test -tags integration -v -timeout 10m \ - -run 'TestIntegration_PaymentGate' \ - ./internal/x402/ - -# All monetize tests -go test -tags integration -v -timeout 20m ./internal/openclaw/ -``` diff --git a/docs/monetisation-architecture-proposal.md b/docs/monetisation-architecture-proposal.md deleted file mode 100644 index 7588c935..00000000 --- a/docs/monetisation-architecture-proposal.md +++ /dev/null @@ -1,480 +0,0 @@ -# Obol Agent: Autonomous Compute Monetization - -**Branch:** `feat/secure-enclave-inference` | **Date:** 2026-02-25 | **Status:** Architecture proposal - ---- - -## 1. The Goal - -A singleton OpenClaw instance — the **obol-agent** — deployed via `obol agent init`, autonomously monetizes compute resources running in the Obol Stack. A user (or the frontend) declares *what* to expose via a Custom Resource; the obol-agent handles *everything else*: model pulling, health validation, payment gating, public exposure, on-chain registration, and status reporting. - -No separate controller binary. No Go operator. The obol-agent is a regular OpenClaw instance with elevated RBAC and the `monetize` skill. Only one obol-agent can exist per cluster; other OpenClaw instances retain standard read-only access. - ---- - -## 2. How It Works - -``` - ┌──────────────────────────────────┐ - │ User / Frontend / obol CLI │ - │ │ - │ kubectl apply -f offer.yaml │ - │ OR: frontend POST to k8s API │ - │ OR: obol sell http ... │ - └──────────┬───────────────────────────┘ - │ creates CR - ▼ - ┌────────────────────────────────────┐ - │ ServiceOffer CR │ - │ apiVersion: obol.org/v1alpha1 │ - │ kind: ServiceOffer │ - └──────────┬───────────────────────────┘ - │ read by - ▼ - ┌────────────────────────────────────┐ - │ obol-agent (singleton OpenClaw) │ - │ namespace: openclaw- │ - │ │ - │ Cron job (every 60s): │ - │ python3 monetize.py process --all│ - │ │ - │ `monetize` skill: │ - │ 1. Read ServiceOffer CRs │ - │ 2. Pull model (if runtime=ollama) │ - │ 3. Health-check upstream service │ - │ 4. Create ForwardAuth Middleware │ - │ 5. Create HTTPRoute │ - │ 6. Register on ERC-8004 │ - │ 7. Update CR status │ - └────────────────────────────────────┘ -``` - -The obol-agent uses its mounted ServiceAccount token to talk to the Kubernetes API — the same pattern `kube.py` already uses for read-only monitoring, but extended with write operations for Middleware and HTTPRoute resources. - -The reconciliation loop is built on OpenClaw's native **cron system**: a `{ kind: "every", everyMs: 60000 }` job runs `monetize.py process --all` every 60 seconds. No sidecar, no K8s CronJob — the cron scheduler runs inside the OpenClaw Gateway process and persists across pod restarts. - ---- - -## 3. Why Not a Separate Controller - -| Concern | Go operator (controller-runtime) | OpenClaw with `monetize` skill | -|---------|----------------------------------|--------------------------------| -| New binary to build/maintain | Yes — new cmd/, Dockerfile, CI | No — skill is a SKILL.md + Python script | -| Hot-updatable logic | No — rebuild + redeploy image | Yes — update skill files on PVC | -| Error handling | Hardcoded retry/backoff | AI reasons about failures, adapts | -| Watch loop | Built-in informer cache | Built-in cron: `monetize.py process --all` every 60s | -| Dependencies | controller-runtime, kubebuilder, code-gen | stdlib Python (`urllib`, `json`, `ssl`) | -| Existing infrastructure | Needs new Deployment, SA, RBAC | Uses existing OpenClaw pod, SA, skill system | - -The traditional operator pattern is the right answer when you need guaranteed sub-second reconciliation with leader election. For monetization lifecycle (deploy → expose → register → monitor), OpenClaw acting on ServiceOffer CRs via skills is simpler and leverages everything already built. - ---- - -## 4. The CRD - -```yaml -apiVersion: obol.org/v1alpha1 -kind: ServiceOffer -metadata: - name: qwen-inference - namespace: openclaw-default # lives alongside the OpenClaw instance -spec: - # What to serve - model: - name: Qwen/Qwen3.5-35B-A3B # Ollama model tag to pull - runtime: ollama # runtime that serves the model - - # Upstream service (Ollama already running in-cluster) - upstream: - service: ollama # k8s Service name - namespace: openclaw-default # where the service runs - port: 11434 - healthPath: /api/tags # endpoint to probe after pull - - # How to price it - pricing: - amount: "0.50" - unit: MTok # per million tokens - currency: USDC - chain: base - - # Who gets paid - wallet: "0x1234...abcd" - - # Public path - path: /services/qwen-inference - - # On-chain advertisement - register: true -``` - -```yaml -status: - conditions: - - type: ModelReady - status: "True" - reason: PullCompleted - message: "Qwen/Qwen3.5-35B-A3B pulled and loaded on ollama" - - type: UpstreamHealthy - status: "True" - reason: HealthCheckPassed - message: "Model responds to inference at ollama.openclaw-default.svc:11434" - - type: PaymentGateReady - status: "True" - reason: MiddlewareCreated - message: "ForwardAuth middleware x402-qwen-inference created" - - type: RoutePublished - status: "True" - reason: HTTPRouteCreated - message: "Exposed at /services/qwen-inference via traefik-gateway" - - type: Registered - status: "True" - reason: ERC8004Registered - message: "Registered on Base (tx: 0xabc...)" - - type: Ready - status: "True" - reason: AllConditionsMet - endpoint: "https://stack.example.com/services/qwen-inference" - observedGeneration: 1 -``` - -**Design:** -- **Namespace-scoped** — the CR lives in the same namespace as the upstream service. This preserves OwnerReference cascade (garbage collection on delete) and avoids cross-namespace complexity. The obol-agent's ClusterRoleBinding lets it watch ServiceOffers across all namespaces via `GET /apis/obol.org/v1alpha1/serviceoffers` (cluster-wide list). -- **Conditions, not Phase** — [deprecated by API conventions](https://github.com/kubernetes/community/blob/master/contributors/devel/sig-architecture/api-conventions.md#typical-status-properties). Conditions give granular insight into which step failed. -- **Status subresource** — prevents users from accidentally overwriting status. ([docs](https://kubernetes.io/docs/tasks/extend-kubernetes/custom-resources/custom-resource-definitions/#status-subresource)) -- **Same-namespace as upstream** — the Middleware and HTTPRoute are created alongside the upstream service. OwnerReferences work (same namespace), so deleting the ServiceOffer garbage-collects the route and middleware. ([docs](https://kubernetes.io/docs/concepts/overview/working-with-objects/owners-dependents/)) - -### CRD installation - -The CRD manifest is embedded in the infrastructure helmfile (same pattern as `obol-agent.yaml`) and applied during `obol stack init`. No kubebuilder, no code-gen — just a static YAML manifest. - ---- - -## 5. The `monetize` Skill - -``` -internal/embed/skills/monetize/ -├── SKILL.md # Teaches OpenClaw when and how to use this skill -├── scripts/ -│ └── monetize.py # K8s API client for ServiceOffer lifecycle -└── references/ - └── x402-pricing.md # Pricing strategies, chain selection -``` - -### SKILL.md (summary) - -Teaches OpenClaw: -- When a user asks to monetize a service, create a ServiceOffer CR -- When asked to check monetization status, read ServiceOffer CRs and report conditions -- When asked to process offers, run the monetization workflow (health → gate → route → register) -- When asked to stop monetizing, delete the ServiceOffer CR (garbage collection handles cleanup) - -### kube.py extension - -`kube.py` gains write helpers (`api_post`, `api_patch`, `api_delete`) alongside its existing `api_get`. The read-only contract is preserved by convention: `kube.py` commands remain read-only; `monetize.py` imports the shared helpers and adds write operations. Pure Python stdlib — no new dependencies. - -Why not a K8s MCP server? The mounted ServiceAccount token already gives direct API access. An MCP server (e.g., Red Hat's `containers/kubernetes-mcp-server`) adds a sidecar container, image pull, and Helm chart changes for what amounts to wrapping the same REST calls. It's a known upgrade path if K8s operations outgrow script-based tooling, but adds no value today. - -### monetize.py - -``` -python3 monetize.py offers # list ServiceOffer CRs -python3 monetize.py process # run full workflow for one offer -python3 monetize.py process --all # process all pending offers -python3 monetize.py status # show conditions -python3 monetize.py create --upstream .. # create a ServiceOffer CR -python3 monetize.py delete # delete CR (cascades cleanup) -``` - -Each `process` invocation: - -1. **Read the ServiceOffer CR** from the k8s API -2. **Pull the model** — if `spec.model.runtime == ollama`, `POST /api/pull` to Ollama -3. **Health-check** — verify model responds at `..svc:` -4. **Create/update Middleware** — Traefik ForwardAuth pointing at `x402-verifier.x402.svc:8080/verify` -5. **Create/update HTTPRoute** — `parentRef: traefik-gateway`, path from spec, backend = upstream service, filter = the Middleware -6. **ERC-8004 registration** — if `spec.register`, call `signer.py` to sign and submit the registration tx -7. **Update CR status** — set conditions and endpoint - -All via the k8s REST API using the mounted ServiceAccount token. No kubectl, no client-go, no external dependencies. - ---- - -## 6. What Gets Created Per ServiceOffer - -All resources are created in the **same namespace** as the upstream service (and the ServiceOffer CR). OwnerReferences on the ServiceOffer handle cleanup. - -| Resource | Purpose | -|----------|---------| -| `Middleware` (traefik.io/v1alpha1) | ForwardAuth to `x402-verifier.x402.svc:8080/verify` — gates the upstream with payment | -| `HTTPRoute` (gateway.networking.k8s.io/v1) | Routes `spec.path` from Traefik Gateway to upstream, through the Middleware | - -That's it. Two resources. The upstream service already runs. The x402 verifier already runs. The Gateway already runs. The tunnel already runs. - -### Why no new namespace - -The upstream service already has a namespace. Creating a new namespace per offer would mean: -- Cross-namespace OwnerReferences don't work ([docs](https://kubernetes.io/docs/concepts/overview/working-with-objects/owners-dependents/)) -- Need ReferenceGrant for cross-namespace backend refs in HTTPRoute ([docs](https://gateway-api.sigs.k8s.io/api-types/referencegrant/)) -- Broader RBAC (namespace create/delete permissions) - -Instead: Middleware and HTTPRoute live alongside the upstream. Delete the ServiceOffer CR → Kubernetes cascades the deletion. - -### Cross-namespace HTTPRoute → Gateway - -The HTTPRoute references `traefik-gateway` in the `traefik` namespace. No ReferenceGrant needed — the Gateway's `allowedRoutes.namespaces.from: All` handles this. ([Gateway API docs](https://gateway-api.sigs.k8s.io/guides/multiple-ns/)) - -### Middleware locality - -Traefik's `ExtensionRef` in HTTPRoute is a `LocalObjectReference` — Middleware must be in the same namespace as the HTTPRoute. The skill creates it there. ([traefik#11126](https://github.com/traefik/traefik/issues/11126)) - ---- - -## 7. RBAC: Singleton obol-agent vs Regular OpenClaw - -### Two tiers of access - -| | obol-agent (singleton) | Regular OpenClaw instances | -|---|---|---| -| **Deployed by** | `obol agent init` | `obol openclaw onboard` | -| **RBAC** | `openclaw-monetize` ClusterRole | Namespace-scoped read-only Role (chart default) | -| **Skills** | All default skills + `monetize` | Default skills only | -| **Cron** | `monetize.py process --all` every 60s | No monetization cron | -| **Count** | Exactly one per cluster | Zero or more | - -Only the obol-agent gets the elevated ClusterRole. `obol agent init` enforces the singleton constraint — it refuses to create a second obol-agent if one already exists. - -### obol-agent ClusterRole - -```yaml -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRole -metadata: - name: openclaw-monetize -rules: - # Read/write ServiceOffer CRs - - apiGroups: ["obol.org"] - resources: ["serviceoffers"] - verbs: ["get", "list", "watch", "create", "update", "patch", "delete"] - - apiGroups: ["obol.org"] - resources: ["serviceoffers/status"] - verbs: ["get", "update", "patch"] - - # Create Middleware and HTTPRoute in service namespaces - - apiGroups: ["traefik.io"] - resources: ["middlewares"] - verbs: ["get", "list", "create", "update", "patch", "delete"] - - apiGroups: ["gateway.networking.k8s.io"] - resources: ["httproutes"] - verbs: ["get", "list", "create", "update", "patch", "delete"] - - # Read pods/services/endpoints/deployments for health checks (any namespace) - - apiGroups: [""] - resources: ["pods", "services", "endpoints"] - verbs: ["get", "list"] - - apiGroups: ["apps"] - resources: ["deployments"] - verbs: ["get", "list"] - - apiGroups: [""] - resources: ["pods/log"] - verbs: ["get"] -``` - -This is bound to OpenClaw's ServiceAccount via ClusterRoleBinding — the skill needs to read services and create routes across namespaces (e.g., check health of Ollama in `openclaw-default`, create a route for an Ethereum node in `ethereum-knowing-wahoo`). - -### What is explicitly NOT granted - -| Excluded | Why | -|----------|-----| -| `secrets` (cluster-wide) | OpenClaw has secrets access in its own namespace only (chart default) | -| `rbac.authorization.k8s.io/*` | Cannot modify its own permissions | -| `namespaces` create/delete | Doesn't create namespaces | -| `deployments` create/update | Doesn't create workloads — gates existing ones | -| `configmaps` create (cluster-wide) | Reads config for diagnostics, doesn't write it | - -### How this gets applied - -The ClusterRole and ClusterRoleBinding are added to the OpenClaw helmfile generation in `internal/openclaw/openclaw.go`, same as the existing `rbac.create: true` overlay. When `obol openclaw onboard` runs, the chart deploys these RBAC resources alongside the pod. - -**Ref:** [RBAC Good Practices](https://kubernetes.io/docs/concepts/security/rbac-good-practices/) - -### Fix the existing `admin` RoleBinding - -The per-network `agent-rbac.yaml` currently binds the `admin` ClusterRole, which includes Secrets and RBAC manipulation. Replace with a scoped ClusterRole (read pods/services + write Middleware/HTTPRoute). - ---- - -## 8. Admission Policy Guardrail - -Defense-in-depth via [ValidatingAdmissionPolicy](https://kubernetes.io/docs/reference/access-authn-authz/validating-admission-policy/) (GA in k8s 1.30, available in k3s 1.31): - -```yaml -apiVersion: admissionregistration.k8s.io/v1 -kind: ValidatingAdmissionPolicy -metadata: - name: openclaw-monetize-guardrail -spec: - failurePolicy: Fail - matchConstraints: - resourceRules: - - apiGroups: ["traefik.io"] - apiVersions: ["v1alpha1"] - operations: ["CREATE", "UPDATE"] - resources: ["middlewares"] - - apiGroups: ["gateway.networking.k8s.io"] - apiVersions: ["v1"] - operations: ["CREATE", "UPDATE"] - resources: ["httproutes"] - matchConditions: - - name: is-openclaw - expression: >- - request.userInfo.username.startsWith("system:serviceaccount:openclaw-") - validations: - # HTTPRoutes must reference traefik-gateway only - - expression: >- - object.spec.parentRefs.all(ref, - ref.name == "traefik-gateway" && ref.?namespace.orValue("traefik") == "traefik" - ) - message: "OpenClaw can only attach routes to traefik-gateway" - # Middlewares must use ForwardAuth to x402-verifier only - - expression: >- - !has(object.spec.forwardAuth) || - object.spec.forwardAuth.address.startsWith("http://x402-verifier.x402.svc") - message: "ForwardAuth must point to x402-verifier" -``` - -Even if RBAC allows creating any Middleware, the admission policy ensures OpenClaw can only create ForwardAuth rules pointing at the legitimate x402 verifier. A prompt injection can't make it route traffic to an attacker-controlled auth endpoint. - ---- - -## 9. The Full Flow - -``` -1. User: "Monetize Qwen3.5-35B-A3B on Ollama at $0.50 per M token on Base" - -2. OpenClaw (using monetize skill) creates the ServiceOffer CR: - python3 monetize.py create qwen-inference \ - --model Qwen/Qwen3.5-35B-A3B --runtime ollama \ - --upstream ollama --namespace openclaw-default --port 11434 \ - --price 0.50 --unit MTok --chain base --wallet 0x... --register - → Creates ServiceOffer CR via k8s API - -3. OpenClaw processes the offer: - python3 monetize.py process qwen-inference - - Step 1: Pull the model through Ollama - POST http://ollama.openclaw-default.svc:11434/api/pull - {"name": "Qwen/Qwen3.5-35B-A3B"} - → Streams download progress, waits for completion - → sets condition: ModelReady=True - - Step 2: Health-check the model is loaded - POST http://ollama.openclaw-default.svc:11434/api/generate - {"model": "Qwen/Qwen3.5-35B-A3B", "prompt": "ping", "stream": false} - → 200 OK, model responds - → sets condition: UpstreamHealthy=True - - Step 3: Create ForwardAuth Middleware - POST /apis/traefik.io/v1alpha1/namespaces/openclaw-default/middlewares - → ForwardAuth → x402-verifier.x402.svc:8080/verify - → sets condition: PaymentGateReady=True - - Step 4: Create HTTPRoute - POST /apis/gateway.networking.k8s.io/v1/namespaces/openclaw-default/httproutes - → parentRef: traefik-gateway, path: /services/qwen-inference - → filter: ExtensionRef to Middleware - → backendRef: ollama:11434 - → sets condition: RoutePublished=True - - Step 5: ERC-8004 registration - python3 signer.py ... (signs registration tx) - → sets condition: Registered=True - - Step 6: Update status - PATCH /apis/obol.org/v1alpha1/.../serviceoffers/qwen-inference/status - → Ready=True, endpoint=https://stack.example.com/services/qwen-inference - -4. User: "What's the status?" - python3 monetize.py status qwen-inference - → Shows conditions table + endpoint + model info - -5. External consumer pays and calls: - POST https://stack.example.com/services/qwen-inference/v1/chat/completions - X-Payment: - → Traefik → ForwardAuth (x402-verifier) → Ollama (Qwen3.5-35B-A3B) -``` - ---- - -## 10. What the `obol` CLI Does - -The CLI becomes a thin CRD client — no deployment logic, no helmfile: - -```bash -obol sell http --upstream ollama --price 0.001 --chain base -# → creates ServiceOffer CR (same as kubectl apply) - -obol sell list -# → kubectl get serviceoffers (formatted) - -obol sell status qwen-inference -# → shows conditions, endpoint, pricing - -obol sell delete qwen-inference -# → deletes CR (OwnerReference cascades cleanup) -``` - -The frontend can do the same via the k8s API directly. - ---- - -## 11. What We Keep, What We Drop, What We Add - -| Component | Action | Reason | -|-----------|--------|--------| -| `cmd/x402-verifier/` | **Keep** | ForwardAuth verifier — the payment gate | -| `internal/x402/` | **Keep** | Verifier handler | -| `internal/erc8004/` | **Keep** | On-chain registration (called by `monetize.py` via `signer.py`) | -| `internal/enclave/` | **Keep** | Secure Enclave signing (orthogonal to monetization) | -| `internal/inference/gateway.go` | **Drop** | Inline x402 middleware — replaced by ForwardAuth | -| `internal/inference/store.go` | **Drop** | Deployment config on disk — replaced by CRD | -| `obol-agent.yaml` (busybox pod) | **Drop** | OpenClaw IS the agent; no separate placeholder pod | -| `agent-rbac.yaml` (`admin` binding) | **Replace** | Scoped ClusterRole instead of `admin` | -| `cmd/obol/service.go` | **Simplify** | Thin CRD client | -| `cmd/obol/monetize.go` | **Simplify** | Thin CRD client | -| `internal/embed/skills/monetize/` | **Add** | New skill: SKILL.md + `monetize.py` + references | -| ServiceOffer CRD manifest | **Add** | Intent interface, applied during `obol stack init` | -| ValidatingAdmissionPolicy | **Add** | Guardrail on what OpenClaw can create | -| `openclaw-monetize` ClusterRole | **Add** | Scoped write access for Middleware/HTTPRoute | - ---- - -## 12. Resolved Decisions - -| Question | Decision | Rationale | -|----------|----------|-----------| -| **Polling vs event-driven** | OpenClaw cron job, every 60s | OpenClaw has a built-in cron scheduler (`{ kind: "every", everyMs: 60000 }`). No sidecar, no K8s CronJob — runs inside the Gateway process. Jobs persist across restarts via `~/.openclaw/cron/jobs.json`. | -| **Multi-instance** | Singleton obol-agent | Only one obol-agent per cluster, enforced by `obol agent init`. Other OpenClaw instances keep read-only RBAC and no `monetize` skill. No coordination problem. | -| **CRD scope** | Namespace-scoped | OwnerReference cascade works (same namespace as Middleware/HTTPRoute). The obol-agent's ClusterRoleBinding lets it list ServiceOffers across all namespaces. Standard `kubectl get serviceoffers -A` works. | -| **K8s API access** | Extend `kube.py` with write helpers | `kube.py` gains `api_post`, `api_patch`, `api_delete` alongside `api_get`. `monetize.py` imports the shared helpers. Pure stdlib, zero new dependencies. K8s MCP server (Red Hat `containers/kubernetes-mcp-server`) is a known upgrade path but unnecessary today. | - ---- - -## References - -| Topic | Link | -|-------|------| -| Custom Resource Definitions | https://kubernetes.io/docs/concepts/extend-kubernetes/api-extension/custom-resources/ | -| CRD status subresource | https://kubernetes.io/docs/tasks/extend-kubernetes/custom-resources/custom-resource-definitions/#status-subresource | -| API conventions (conditions) | https://github.com/kubernetes/community/blob/master/contributors/devel/sig-architecture/api-conventions.md | -| RBAC | https://kubernetes.io/docs/reference/access-authn-authz/rbac/ | -| RBAC good practices | https://kubernetes.io/docs/concepts/security/rbac-good-practices/ | -| ValidatingAdmissionPolicy | https://kubernetes.io/docs/reference/access-authn-authz/validating-admission-policy/ | -| OwnerReferences | https://kubernetes.io/docs/concepts/overview/working-with-objects/owners-dependents/ | -| Cross-namespace routing (Gateway API) | https://gateway-api.sigs.k8s.io/guides/multiple-ns/ | -| ReferenceGrant | https://gateway-api.sigs.k8s.io/api-types/referencegrant/ | -| Accessing API from a pod | https://kubernetes.io/docs/tasks/run-application/access-api-from-pod/ | -| Pod Security Standards | https://kubernetes.io/docs/concepts/security/pod-security-standards/ | -| Service account tokens | https://kubernetes.io/docs/concepts/security/service-accounts/ | -| Traefik ForwardAuth | https://doc.traefik.io/traefik/reference/routing-configuration/http/middlewares/forwardauth/ | -| Traefik Middleware locality | https://github.com/traefik/traefik/issues/11126 | diff --git a/docs/plans/buy-side-testing.md b/docs/plans/buy-side-testing.md deleted file mode 100644 index 39bfc260..00000000 --- a/docs/plans/buy-side-testing.md +++ /dev/null @@ -1,214 +0,0 @@ -# Buy-Side x402 Hands-Off Testing Plan - -## Current State - -- All clusters are down, no k3d containers running -- x402 extension (`x402.py`) created in LiteLLM fork, registered in `__init__.py` -- `buy-inference` skill created: `buy.py` + `SKILL.md` + `references/x402-buyer-api.md` -- `buy_side_test.go` exists but bypasses LiteLLM (sends directly to mock seller) -- LiteLLM Docker image `latest` includes x402 extension - -## Gaps (ordered by dependency) - -### Gap 0: LiteLLM image with x402 extension - -**Problem**: The LiteLLM Docker image needs to include the x402 extension for buy-side payments. - -**Fix**: -1. Ensure `internal/embed/infrastructure/base/templates/llm.yaml` references the correct LiteLLM image tag -2. The LiteLLM image should include x402 extension support -3. Update `llm.yaml` to use the correct version if needed - -**Verification**: `docker run --rm litellm python -c "from litellm.extensions.providers.x402 import install_x402; print('ok')"` (if applicable) - ---- - -### Gap 1: No test routes through LiteLLM x402 extension - -**Problem**: `buy_side_test.go` patches the ConfigMap but sends the paid request directly to the mock seller at `http://127.0.0.1:`. The critical path — LiteLLM receiving a request, the x402 extension signing via remote-signer, injecting `X-PAYMENT`, forwarding to the seller — is never exercised. - -**Fix**: Add a new integration test `TestIntegration_BuySide_ThroughLiteLLM` that: - -1. Starts mock x402 seller on host (reuse `startMockX402Seller`) -2. Patches `litellm-config` ConfigMap with x402 provider pointing at mock seller -3. Restarts litellm deployment to force immediate reload (not wait 120s) -4. Port-forwards litellm:4000 to localhost -5. Sends a chat request to litellm with the purchased model name (e.g., `test-buy-x402/test-model`) -6. litellm routes to `X402Provider.chat()` → signs via remote-signer → injects X-PAYMENT → forwards to mock seller -7. Asserts: mock seller received the X-PAYMENT header, response is 200 with inference data - -**Requires**: Running cluster with litellm + remote-signer (from `obol openclaw onboard`) - -**Key detail**: The mock seller must be reachable from inside the cluster. Use `testutil.ClusterHostIP(t)` (resolves to `host.k3d.internal` or `host.docker.internal`). Listen on `0.0.0.0` (already done in `startMockX402Seller`). - ---- - -### Gap 2: No mock remote-signer for isolated testing - -**Problem**: The x402 extension calls `POST remote-signer:9000/api/v1/sign/{addr}/typed-data`. In a full cluster, the real remote-signer handles this. But for faster/lighter tests, we have no mock. - -**Fix**: Add `testutil.StartMockRemoteSigner(t, privateKeyHex)` to provide a mock remote-signer that: - -1. Listens on `0.0.0.0:` -2. `GET /api/v1/keys` → returns `{"keys": ["
"]}` -3. `GET /healthz` → returns `{"status": "ok"}` -4. `POST /api/v1/sign/{addr}/typed-data` → uses `go-ethereum` crypto to sign EIP-712 typed data with the provided private key → returns `{"signature": "0x..."}` - -**Why**: Enables testing the LiteLLM x402 extension → remote-signer path without deploying the Rust remote-signer binary. Also enables testing `buy.py` commands (`balance` excepted) without a full cluster. - -**Scope**: ~80 lines Go. Reuses `testutil.eip712_signer.go` for signing logic. - -**Priority**: NICE-TO-HAVE for first test pass. The real remote-signer works fine in-cluster. Only needed if we want to test without a full cluster later. - ---- - -### Gap 3: buy.py skill not smoke-tested in-pod - -**Problem**: `buy.py` imports from sibling skills (`kube.py`, `signer.py`) via `sys.path.insert`. This works in theory (same pattern as `monetize.py`) but has never been tested in an actual pod where the skills are deployed at `/data/.openclaw/skills/`. - -**Fix**: Add a smoke test to verify the buy-inference skill loads correctly in-pod: - -```python -def test_buy_inference_help(): - """buy-inference skill loads and prints help.""" - result = subprocess.run( - ["python3", "/data/.openclaw/skills/buy-inference/scripts/buy.py", "--help"], - capture_output=True, text=True, timeout=10, - ) - assert result.returncode == 0 - assert "probe" in result.stdout - assert "buy" in result.stdout -``` - -**Scope**: 10 lines. - ---- - -### Gap 4: `llm.yaml` image tag configuration - -**Problem**: `internal/embed/infrastructure/base/templates/llm.yaml` needs to reference the correct LiteLLM image with x402 support. - -**Fix**: Ensure the LiteLLM deployment in `llm.yaml` uses the correct image tag: -```yaml -image: litellm:latest # or appropriate version with x402 support -``` - -**Scope**: Verify image references in llm.yaml are correct. - ---- - -## Testing Sequence - -### Phase 1: Build & Push (pre-cluster) - -``` -1. Ensure LiteLLM image with x402 extension is available (Gap 0) -2. Update llm.yaml image tag (Gap 4) -3. Build obol binary from worktree -4. Verify: go build ./... && go test ./... && go vet -tags integration ./internal/x402/ -``` - -### Phase 2: Cluster Up - -``` -5. OBOL_DEVELOPMENT=true obol stack init && obol stack up -6. obol openclaw onboard (deploys remote-signer + agent) -7. Verify: kubectl get pods -n llm (litellm Running) -8. Verify: kubectl get pods -n openclaw-obol-agent (remote-signer Running) -``` - -### Phase 3: Buy Skill Smoke Test - -``` -9. kubectl exec -n openclaw-obol-agent deploy/openclaw -- \ - python3 /data/.openclaw/skills/buy-inference/scripts/buy.py --help -10. kubectl exec -n openclaw-obol-agent deploy/openclaw -- \ - python3 /data/.openclaw/skills/buy-inference/scripts/buy.py list - (expect: "No purchased x402 providers.") -``` - -### Phase 4: Manual Buy-Side Walkthrough - -``` -11. Start mock seller on host: - go test -tags integration -v -run TestIntegration_BuySide_ProbeAndPurchase -timeout 10m ./internal/x402/ - (or start a real seller via: obol sell inference on another cluster) - -12. From inside the agent pod, run probe: - kubectl exec -n openclaw-obol-agent deploy/openclaw -- \ - python3 /data/.openclaw/skills/buy-inference/scripts/buy.py probe \ - http://host.k3d.internal:/v1/chat/completions - (expect: 402 pricing output) - -13. From inside the agent pod, run buy: - kubectl exec -n openclaw-obol-agent deploy/openclaw -- \ - python3 /data/.openclaw/skills/buy-inference/scripts/buy.py buy test-seller \ - --endpoint http://host.k3d.internal: \ - --model test-model --budget 10000 - (expect: provider added to litellm-config) - -14. Wait 2 min for ConfigMap reload, or force: - kubectl rollout restart -n llm deploy/litellm - kubectl rollout status -n llm deploy/litellm --timeout=60s - -15. Verify model appears in litellm: - kubectl exec -n llm deploy/litellm -- curl -s http://localhost:4000/models | jq . - -16. Send inference through litellm using purchased model: - kubectl exec -n llm deploy/litellm -- curl -s -X POST http://localhost:4000/v1/chat/completions \ - -H "Content-Type: application/json" \ - -d '{"model":"test-seller/test-model","messages":[{"role":"user","content":"hello"}]}' - (expect: x402 extension signs payment, forwards to seller, returns 200) - -17. Check seller received X-PAYMENT header (from test logs or mock seller output) - -18. Cleanup: - kubectl exec -n openclaw-obol-agent deploy/openclaw -- \ - python3 /data/.openclaw/skills/buy-inference/scripts/buy.py remove test-seller -``` - -### Phase 5: Integration Test (automated) - -``` -19. Run the through-litellm integration test (Gap 1): - go test -tags integration -v -run TestIntegration_BuySide_ThroughLiteLLM -timeout 10m ./internal/x402/ - -20. Run existing buy-side tests: - go test -tags integration -v -run TestIntegration_BuySide -timeout 10m ./internal/x402/ -``` - -### Phase 6: Full Hands-Off (OpenClaw agent does it autonomously) - -``` -21. Trigger OpenClaw heartbeat with a task that exercises the buy skill: - "Discover x402 inference sellers, probe the first one, buy access if the price - is under 10000 micro-units, then send a test message through the purchased model." - -22. Watch logs for ~5 min: - kubectl logs -n openclaw-obol-agent deploy/openclaw -f - -23. Verify: the agent probed, bought, and used a remote model autonomously -``` - -## Minimal Critical Path - -If time is limited, the absolute minimum to verify the buy lifecycle works: - -1. **Gap 0** — ensure LiteLLM image with x402 extension is available (BLOCKER) -2. **Gap 4** — update image tag in llm.yaml (BLOCKER) -3. Build obol binary, bring up cluster, onboard openclaw -4. Start mock seller on host -5. Run `buy.py probe` + `buy.py buy` from agent pod -6. Restart litellm, send request through purchased model -7. Verify 200 response with X-PAYMENT header at seller - -Everything else (Gap 1 automated test, Gap 2 mock signer, Gap 3 smoke test) can follow after the manual walkthrough confirms the flow works. - -## Files to Modify - -| File | Change | Gap | -|------|--------|-----| -| `internal/embed/infrastructure/base/templates/llm.yaml` | Verify LiteLLM image tag | 4 | -| `internal/x402/buy_side_test.go` | Add `TestIntegration_BuySide_ThroughLiteLLM` | 1 | -| `internal/testutil/mock_signer.go` | New: mock remote-signer | 2 | -| `tests/skills_smoke_test.py` | Add buy-inference smoke test | 3 | diff --git a/docs/plans/cli-agent-readiness.md b/docs/plans/cli-agent-readiness.md deleted file mode 100644 index 90a5d7aa..00000000 --- a/docs/plans/cli-agent-readiness.md +++ /dev/null @@ -1,307 +0,0 @@ -# CLI Agent-Readiness Optimizations - -## Status - -**Implemented (this branch)**: -- Phase 1: Global `--output json` / `-o json` / `OBOL_OUTPUT=json` flag -- Phase 1: `OutputMode` + `IsJSON()` + `JSON()` on `internal/ui/UI` -- Phase 1: 11 commands refactored with typed JSON results (sell list/status/info, network list, model status/list, version, update, openclaw list, tunnel status) -- Phase 1: Human output redirected to stderr in JSON mode (stdout is clean JSON) -- Phase 2: `internal/validate/` package (Name, Namespace, WalletAddress, ChainName, Price, URL, Path, NoControlChars) -- Phase 2: Headless prompt paths — `Confirm`, `Select`, `Input`, `SecretInput` auto-resolve defaults in non-TTY/JSON mode -- Phase 2: `sell delete` migrated from raw `fmt.Scanln` to `u.Confirm()` -- Phase 6: `CONTEXT.md` — agent-facing context document - -- Phase 1D: `--from-json` on sell http, sell pricing, network add (`cmd/obol/input.go` helper) -- Phase 2B: `validate.Name()` wired into sell inference/http/stop/delete, `validate.URL()` in network add -- Phase 2C: model.go `promptModelPull()` migrated from bufio to `u.Select()`/`u.Input()`, openclaw onboard headless via `u.IsTTY() && !u.IsJSON()` - -**Deferred to follow-up**: -- Phase 3: `obol describe` schema introspection -- Phase 4: `--fields` field filtering -- Phase 5: `--dry-run` for mutating commands -- Phase 7: MCP surface (`obol mcp`) - -## Context - -The obol CLI is increasingly consumed by AI agents — Claude Code during development, OpenClaw agents in-cluster, and soon MCP clients. Today the CLI is human-optimized: colored output, spinners, interactive prompts, and hand-formatted tables. Agents need structured output, non-interactive paths, input hardening, and runtime introspection. This plan makes the CLI agent-ready while preserving human DX. - -**Strengths**: `internal/ui/` abstraction with TTY detection, `OutputMode` (human/json), `--verbose`/`--quiet`/`--output` global flags, `internal/schemas/` with JSON-tagged Go types, `internal/validate/` for input validation, `--force` pattern for non-interactive destructive ops, 23 SKILL.md files shipped in `internal/embed/skills/`, `CONTEXT.md` for agent consumption. - -**Remaining gaps**: `--from-json` for structured input, some `fmt.Printf` calls still bypass UI layer, `model.go` interactive prompts not fully migrated, `openclaw onboard` still hardwired `Interactive: true`, no schema introspection, no `--dry-run`, no field filtering, no MCP surface. - ---- - -## Phase 1: Global `--output json` + Raw JSON Input - -Structured output is table stakes. Raw JSON input (`--from-json`) is first-class — agents shouldn't have to translate nested structures into 15+ flags. - -### 1A. Extend UI struct with output mode - -**`internal/ui/ui.go`** — Add `OutputMode` type (`human`|`json`) and field to `UI` struct. Add `NewWithAllOptions(verbose, quiet bool, output OutputMode)`. Add `IsJSON() bool`. - -**`internal/ui/output.go`** — Add `JSON(v any) error` method that writes to stdout via `json.NewEncoder`. When `IsJSON()` is true, redirect `Info`/`Success`/`Detail`/`Print`/`Printf`/`Dim`/`Bold`/`Blank` to stderr (so agents get clean JSON on stdout, diagnostics on stderr). Suppress spinners in JSON mode. - -### 1B. Add global `--output` flag - -**`cmd/obol/main.go`** (lines 110-127) — Add `--output` / `-o` flag (`human`|`json`, env `OBOL_OUTPUT`, default `human`). Wire in `Before` hook to pass to `ui.NewWithAllOptions`. - -### 1C. Refactor commands to return typed results - -Don't just bolt JSON onto existing `fmt.Printf` calls. Refactor high-value commands to return typed data first, then format for human or JSON. This pays off twice: clean JSON output now, and reusable typed results for MCP later. - -**Audit note**: Raw `fmt.Printf` output is spread across `main.go:460` (version), `model.go:286` (tables), `network.go:188` (tables), and throughout `sell.go`. Each needs a return-data-then-format refactor. - -| Command | Strategy | Effort | -|---------|----------|--------| -| `sell list` | Switch kubectl arg from `-o wide` to `-o json` | Trivial | -| `sell status ` | Switch kubectl arg from `-o yaml` to `-o json` | Trivial | -| `sell status` (global) | Marshal `PricingConfig` + `store.List()` — currently raw `fmt.Printf` at `sell.go:463-498` | Medium | -| `sell info` | Already has `--json` (`sell.go:841`) — wire to global flag, deprecate local | Trivial | -| `network list` | `ListRPCNetworks()` returns `[]RPCNetworkInfo` — marshal it, but local node output also uses `fmt.Printf` at `network.go:188` | Medium | -| `model status` | Return provider status map as JSON — currently `fmt.Printf` tables at `model.go:286` | Medium | -| `model list` | `ListOllamaModels()` returns structured data | Low | -| `version` | `BuildInfo()` returns a string today — refactor to struct with fields (version, commit, date, go version) | Medium | -| `update` | Already has `--json` (`update.go:20`); wire to global flag, deprecate local | Trivial | -| `openclaw list` | Refactor to return data before formatting | Medium | -| `tunnel status` | Refactor to return data before formatting | Medium | - -### 1D. Raw JSON input (`--from-json`) - -Add `--from-json` flag to all commands that create resources. Accepts file path or `-` for stdin. Unmarshals into existing `internal/schemas/` types, validates, creates manifest. This is first-class, not an afterthought. - -| Command | Schema Type | Flags Bypassed | -|---------|-------------|----------------| -| `sell http` | `schemas.ServiceOfferSpec` | 15+ flags (wallet, chain, price, upstream, port, namespace, health-path, etc.) | -| `sell inference` | `schemas.ServiceOfferSpec` | 10+ flags | -| `sell pricing` | `schemas.PaymentTerms` | wallet, chain, facilitator | -| `network add` | New `RPCConfig` type | endpoint, chain-id, allow-writes | - -### Testing -- `internal/ui/ui_test.go`: OutputMode switching, JSON writes valid JSON to stdout, human methods go to stderr in JSON mode -- `cmd/obol/output_test.go`: `--output json` on each migrated command produces parseable JSON -- `cmd/obol/json_input_test.go`: `--from-json` with valid/invalid specs - ---- - -## Phase 2: Input Validation + Headless Paths - -Agents hallucinate inputs and can't answer interactive prompts. Fix both together. - -### 2A. New validation package - -**`internal/validate/validate.go`** (new) - -``` -Name(s) — k8s-safe: [a-z0-9][a-z0-9.-]*, no path traversal -Namespace(s) — same rules as Name -WalletAddress(s) — reuse x402verifier.ValidateWallet() pattern -ChainName(s) — from known set (base, base-sepolia, etc.) -Path(s) — no .., no %2e%2e, no control chars -Price(s) — valid decimal, positive -URL(s) — parseable, no control chars -NoControlChars(s) — reject \x00-\x1f except \n\t -``` - -### 2B. Wire into commands - -Add validation at the top of every action handler for positional args and key flags: -- **`cmd/obol/sell.go`**: name, wallet, chain, path, price, namespace, upstream URL -- **`cmd/obol/network.go`**: network name, custom RPC URL, chain ID -- **`cmd/obol/model.go`**: provider name, endpoint URL -- **`cmd/obol/openclaw.go`**: instance ID - -### 2C. Headless paths for interactive flows - -**`internal/ui/prompt.go`** — When `IsJSON() || !IsTTY()`: -- `Confirm` → return default value (no stdin read) -- `Select` → return error: "interactive selection unavailable; use --provider flag" -- `Input` → return default or error if no default -- `SecretInput` → return error: "use --api-key flag" - -**`cmd/obol/openclaw.go`** (line 36) — `openclaw onboard` is hardwired `Interactive: true`. Add a non-interactive path when all required flags are provided (`--id`, plus any other required inputs). Only fall through to interactive mode when flags are missing AND stdin is a TTY. - -**`cmd/obol/model.go`** (lines 62-84) — `model setup` enters interactive selection when `--provider` is omitted. In non-TTY/JSON mode, error with required flags instead. - -**`cmd/obol/model.go`** (lines 387-419) — `model pull` uses `bufio.NewReader(os.Stdin)` for interactive model selection. Same treatment. - -**`cmd/obol/sell.go`** (line 576-588) — `sell delete` confirmation uses raw `fmt.Scanln`. Migrate to `u.Confirm()` so the headless path is automatic. - -### Testing -- `internal/validate/validate_test.go`: Table-driven tests for path traversal variants, control char injection, valid inputs -- Test that `--output json` + missing required flags → clear error (not a hung prompt) -- Test that `openclaw onboard --id test -o json` works without interactive mode - ---- - -## Phase 3: Schema Introspection (`obol describe`) - -Let agents discover what the CLI accepts at runtime without parsing `--help` text. - -### 3A. Add `obol describe` command - -**`cmd/obol/describe.go`** (new) - -``` -obol describe # list all commands + flags as JSON -obol describe sell http # flags + ServiceOffer schema for that command -obol describe --schemas # dump resource schemas only -``` - -Walk urfave/cli's `*cli.Command` tree. For each command, emit: name, usage, flags (name, type, required, default, env vars, aliases), ArgsUsage. Output always JSON. - -### 3B. Schema registry - -**`internal/schemas/registry.go`** (new) — Map of schema names to JSON Schema generated from Go struct tags via `reflect`. Schemas: `ServiceOfferSpec`, `PaymentTerms`, `PriceTable`, `RegistrationSpec`. - -### 3C. Command metadata annotations - -Add `Metadata: map[string]any{"schema": "ServiceOfferSpec", "mutating": true}` to commands that create resources (sell http, sell inference, sell pricing). `obol describe` reads this and includes the schema in output. - -### Testing -- `cmd/obol/describe_test.go`: Valid JSON output, every command appears, schemas resolve, flag metadata matches actual flags - ---- - -## Phase 4: `--fields` Support - -Let agents limit response size to protect their context window. - -### 4A. Field mask implementation - -**`internal/ui/fields.go`** (new) — `FilterFields(data any, fields []string) any` using reflect on JSON tags. - -### 4B. Global `--fields` flag - -**`cmd/obol/main.go`** — Global `--fields` flag (comma-separated, requires `--output json`). Applied in `u.JSON()` before encoding. - -### Testing -- `--fields name,status` on `sell list -o json` returns only those fields -- `--fields` without `--output json` returns error - ---- - -## Phase 5: `--dry-run` for Mutating Commands - -Let agents validate before mutating. Safety rail. - -### 5A. Global `--dry-run` flag - -**`cmd/obol/main.go`** — Add `--dry-run` bool flag. - -### 5B. Priority commands - -| Command | Implementation | -|---------|---------------| -| `sell http` | Already builds manifest before `kubectlApply()` — return manifest instead of applying | -| `sell pricing` | Validate wallet/chain, show what would be written to ConfigMap | -| `network add` | Validate chain, show which RPCs would be added to eRPC config | -| `sell delete` | Validate name exists, show what would be deleted | - -Pattern: after validation, before execution, check `cmd.Root().Bool("dry-run")` and return a `DryRunResult{Command, Valid, WouldCreate, Manifest}` as JSON. - -### Testing -- `cmd/obol/dryrun_test.go`: `--dry-run sell http` returns manifest without kubectl apply, validation still runs in dry-run - ---- - -## Phase 6: Agent Context & Skills - -The 23 SKILL.md files are a strength, but there's no top-level `CONTEXT.md` encoding invariants agents can't intuit from `--help`. - -### 6A. Ship `CONTEXT.md` - -**`CONTEXT.md`** (repo root, also embedded in binary) — Agent-facing context file encoding: -- Always use `--output json` when parsing output programmatically -- Always use `--force` for non-interactive destructive operations -- Always use `--fields` on list commands to limit context window usage -- Always use `--dry-run` before mutating operations -- Use `obol describe ` to introspect flags and schemas -- Cluster commands require `OBOL_CONFIG_DIR` or a running stack (`obol stack up`) -- Payment wallet addresses must be 0x-prefixed, 42 chars -- Chain names: `base`, `base-sepolia` (not CAIP-2 format) - -### 6B. Update existing skills - -Review and update the 23 SKILL.md files to reference the new agent-friendly flags where relevant (e.g., the `sell` skill should mention `--from-json` and `--dry-run`). - ---- - -## Phase 7: MCP Surface (`obol mcp`) - -Expose the CLI as typed JSON-RPC tools over stdio. Depends on all previous phases. - -### 7A. New package `internal/mcp/` - -- `server.go` — MCP server over stdio using `github.com/mark3labs/mcp-go` -- `tools.go` — Tool definitions from the typed result functions built in Phase 1C (not by shelling out with `--output json`) -- `handlers.go` — Tool handlers that call the refactored return-typed-data functions directly - -### 7B. `obol mcp` command - -**`cmd/obol/mcp.go`** (new) — Starts MCP server. Exposes high-value tools only: -- sell: `sell_http`, `sell_list`, `sell_status`, `sell_pricing`, `sell_delete` -- network: `network_list`, `network_add`, `network_remove`, `network_status` -- model: `model_status`, `model_list`, `model_setup` -- openclaw: `openclaw_list`, `openclaw_onboard` -- utility: `version`, `update`, `tunnel_status` - -Excludes: kubectl/helm/k9s passthroughs, interactive-only commands, dangerous ops (stack purge/down). - -### Testing -- `internal/mcp/mcp_test.go`: Tool registration produces valid MCP definitions, stdin/stdout JSON-RPC round-trip - ---- - -## Key Files Summary - -| File | Changes | -|------|---------| -| `internal/ui/ui.go` | Add OutputMode, IsJSON(), NewWithAllOptions() | -| `internal/ui/output.go` | Add JSON() method, stderr redirect in JSON mode | -| `internal/ui/prompt.go` | Non-interactive behavior when JSON/non-TTY | -| `internal/ui/fields.go` | New — field mask filtering | -| `cmd/obol/main.go` | `--output`, `--dry-run`, `--fields` global flags + Before hook | -| `cmd/obol/sell.go` | JSON output, typed results, input validation, dry-run, --from-json, migrate Scanln to u.Confirm | -| `cmd/obol/network.go` | JSON output, typed results, input validation | -| `cmd/obol/model.go` | JSON output, typed results, input validation, headless paths | -| `cmd/obol/openclaw.go` | JSON output, typed results, input validation, headless onboard path | -| `cmd/obol/update.go` | Wire to global --output flag, deprecate local --json | -| `cmd/obol/describe.go` | New — schema introspection command | -| `cmd/obol/mcp.go` | New — `obol mcp` command | -| `internal/validate/validate.go` | New — input validation functions | -| `internal/schemas/registry.go` | New — JSON Schema generation from Go types | -| `internal/mcp/` | New package — MCP server, tools, handlers | -| `CONTEXT.md` | New — agent-facing context file | - -## Verification - -```bash -# Phase 1: JSON output + JSON input -obol sell list -o json | jq . -obol sell status -o json | jq . -obol version -o json | jq . -obol network list -o json | jq . -echo '{"upstream":{"service":"ollama","namespace":"llm","port":11434},...}' | obol sell http test --from-json - - -# Phase 2: Input validation + headless -obol sell http '../etc/passwd' --wallet 0x... --chain base-sepolia # should error -obol sell http 'valid-name' --wallet 'not-a-wallet' --chain base-sepolia # should error -echo '' | obol model setup -o json # should error with "use --provider flag", not hang - -# Phase 3: Schema introspection -obol describe | jq '.commands | length' -obol describe sell http | jq '.schema' - -# Phase 4: Fields -obol sell list -o json --fields name,namespace,status | jq . - -# Phase 5: Dry-run -obol sell http test-svc --wallet 0x... --chain base-sepolia --dry-run -o json | jq . - -# Phase 7: MCP -echo '{"jsonrpc":"2.0","method":"tools/list","id":1}' | obol mcp - -# Unit tests -go test ./internal/ui/ ./internal/validate/ ./internal/schemas/ ./internal/mcp/ ./cmd/obol/ -``` diff --git a/docs/plans/multi-network-sell.md b/docs/plans/multi-network-sell.md deleted file mode 100644 index 77b6bf01..00000000 --- a/docs/plans/multi-network-sell.md +++ /dev/null @@ -1,387 +0,0 @@ -# Multi-Network Sell Command + UX Improvements - -## Context - -The `obol sell` command currently only supports ERC-8004 registration on Base Sepolia, requires manual private key management via `--private-key-file`, and forces users to specify all flags explicitly. We want to: - -1. Support 3 registration networks: **base-sepolia**, **base**, **ethereum mainnet** -2. Support **multi-chain** registration: `--chain mainnet,base` registers on both, best-effort -3. Use the **remote-signer** for all signing (not private key extraction) — EIP-712 typed data + transaction signing via its REST API -4. Use **sponsored registration** (zero gas) on ethereum mainnet via howto8004.com -5. Use the **local eRPC** (`localhost/rpc`) for chain access instead of public RPCs -6. Add **interactive prompts** using `charmbracelet/huh` with good defaults -7. **Auto-discover** the remote-signer wallet address -8. Add **ethereum mainnet** as a valid x402 payment chain - -Frontend deferred to follow-up PR. EIP-7702 handled server-side by sponsor — no CLI implementation needed. - -### Network Matrix - -| Network | x402 Payment | x402 Facilitator | ERC-8004 Registration | Sponsored Reg | -|---------|-------------|-------------------|----------------------|---------------| -| base-sepolia | Yes | `facilitator.x402.rs` | Yes (direct tx via remote-signer) | No | -| base | Yes | `x402.gcp.obol.tech` | Yes (direct tx via remote-signer) | No | -| ethereum | Yes (no facilitator yet) | TBD | Yes | Yes (`sponsored.howto8004.com/api/register`) | - ---- - -## Phase 1: Multi-Network ERC-8004 Registry Config - -### `internal/erc8004/networks.go` (new) - -```go -type NetworkConfig struct { - Name string // "base-sepolia", "base", "ethereum" - ChainID int64 - RegistryAddress string // per-chain registry address - SponsorURL string // empty if no sponsor - DelegateAddress string // EIP-7702 delegate (for sponsored flow) - ERPCNetwork string // eRPC path segment: "base-sepolia", "base", "mainnet" -} - -func ResolveNetwork(name string) (NetworkConfig, error) -func ResolveNetworks(csv string) ([]NetworkConfig, error) // "mainnet,base" → []NetworkConfig -func SupportedNetworks() []NetworkConfig -``` - -Three entries: -- `base-sepolia`: chainID 84532, registry `0x8004A818BFB912233c491871b3d84c89A494BD9e`, eRPC `base-sepolia` -- `base`: chainID 8453, registry TBD (confirm CREATE2 address), eRPC `base` -- `ethereum` / `mainnet`: chainID 1, registry `0x8004A169FB4a3325136EB29fA0ceB6D2e539a432`, sponsor `https://sponsored.howto8004.com/api/register`, delegate `0x77fb3D2ff6dB9dcbF1b7E0693b3c746B30499eE8`, eRPC `mainnet` - -RPC URL is **not** in NetworkConfig — always use local eRPC at `http://localhost/rpc/{ERPCNetwork}` (from host via k3d port mapping). - -### `internal/erc8004/client.go` - -- Add `NewClientForNetwork(ctx, rpcBaseURL string, net NetworkConfig) (*Client, error)` — constructs RPC URL as `rpcBaseURL + "/" + net.ERPCNetwork`, uses `net.RegistryAddress` -- Keep `NewClient(ctx, rpcURL)` as backward-compat wrapper - -### Files -- `internal/erc8004/networks.go` (new) -- `internal/erc8004/networks_test.go` (new) -- `internal/erc8004/client.go` (add `NewClientForNetwork`) - ---- - -## Phase 2: Remote-Signer Integration for Registration - -### Architecture - -The remote-signer REST API at port 9000 already supports: -- `POST /api/v1/sign/{address}/transaction` — sign raw transactions -- `POST /api/v1/sign/{address}/typed-data` — sign EIP-712 typed data -- `GET /api/v1/keys` — list loaded wallet addresses - -From the host CLI, access via **temporary port-forward** to `remote-signer:9000` (same pattern as `openclaw cli`). - -### `internal/erc8004/signer.go` (new) - -```go -// RemoteSigner wraps the remote-signer REST API for ERC-8004 operations. -type RemoteSigner struct { - baseURL string // e.g. "http://localhost:19000" (port-forwarded) -} - -func NewRemoteSigner(baseURL string) *RemoteSigner - -// GetAddress returns the first loaded signing address. -func (s *RemoteSigner) GetAddress(ctx context.Context) (common.Address, error) - -// SignTransaction signs an EIP-1559 transaction for direct on-chain registration. -func (s *RemoteSigner) SignTransaction(ctx context.Context, addr common.Address, tx SignTxRequest) ([]byte, error) - -// SignTypedData signs EIP-712 typed data (for sponsored registration). -func (s *RemoteSigner) SignTypedData(ctx context.Context, addr common.Address, data EIP712TypedData) ([]byte, error) -``` - -### `internal/erc8004/register.go` (new) - -Two registration paths: - -**Direct on-chain** (base-sepolia, base): -1. Port-forward to remote-signer -2. `signer.GetAddress()` → wallet address -3. Build `register(agentURI)` calldata -4. Get nonce + gas estimates from eRPC -5. `signer.SignTransaction()` → signed tx -6. `eth_sendRawTransaction` via eRPC -7. Wait for receipt, parse `Registered` event - -**Sponsored** (ethereum mainnet): -1. Port-forward to remote-signer -2. `signer.GetAddress()` → wallet address -3. `signer.SignTypedData()` → EIP-712 authorization + registration intent signatures -4. POST to `net.SponsorURL` with signatures -5. Parse response `{success, agentId, txHash}` - -### Port-Forward Helper - -Reuse or adapt the pattern from `openclaw cli` (`cmd/obol/openclaw.go`). New helper: - -```go -// portForwardRemoteSigner starts a port-forward to the remote-signer in the -// given namespace and returns the local URL + cleanup function. -func portForwardRemoteSigner(cfg *config.Config, namespace string) (baseURL string, cleanup func(), err error) -``` - -### Files -- `internal/erc8004/signer.go` (new — remote-signer REST client) -- `internal/erc8004/signer_test.go` (new — HTTP mock tests) -- `internal/erc8004/register.go` (new — direct + sponsored registration flows) -- `internal/erc8004/sponsor.go` (new — sponsored API client, EIP-712 types) -- `internal/erc8004/sponsor_test.go` (new) - ---- - -## Phase 3: Wallet Auto-Discovery - -### `internal/openclaw/wallet_resolve.go` (new) - -```go -// ResolveWalletAddress returns the wallet address from the single OpenClaw instance. -// 0 instances → error, 1 → auto-select, 2+ → error suggesting --wallet. -func ResolveWalletAddress(cfg *config.Config) (string, error) - -// ResolveInstanceNamespace returns the namespace of the single OpenClaw instance -// (needed for port-forwarding to the remote-signer in that namespace). -func ResolveInstanceNamespace(cfg *config.Config) (string, error) -``` - -Flow: -1. `ListInstanceIDs(cfg)` → instance IDs -2. 0 → error, 1 → read wallet.json, 2+ → error with list of addresses -3. `ReadWalletMetadata(DeploymentPath(cfg, id))` → `WalletInfo.Address` - -**No private key extraction.** The address is all we need for auto-discovery. Signing goes through the remote-signer API. - -### Files -- `internal/openclaw/wallet_resolve.go` (new) -- `internal/openclaw/wallet_resolve_test.go` (new) - ---- - -## Phase 4: Rewrite `sell register` - -### `cmd/obol/sell.go` — `sellRegisterCommand` - -**New flags:** - -| Flag | Type | Default | Notes | -|------|------|---------|-------| -| `--chain` | string | `base-sepolia` | Comma-separated: `base-sepolia,base,mainnet`. Register on each, best-effort | -| `--sponsored` | bool | auto | `true` when network has sponsor URL | -| `--endpoint` | string | auto | Auto-detected from tunnel | -| `--name` | string | `Obol Agent` | Agent name for registration | -| `--description` | string | smart default | Auto-generated from stack info | -| `--image` | string | smart default | Default Obol logo URL | -| `--private-key-file` | string | | Fallback — used only if no remote-signer detected | - -**Removed:** `--private-key` (deprecated), `--rpc-url` (use local eRPC) - -**Action logic:** -1. Parse `--chain` → `erc8004.ResolveNetworks(chainCSV)` → `[]NetworkConfig` -2. Resolve wallet: try `openclaw.ResolveWalletAddress(cfg)`. If found, use remote-signer path. If not, require `--private-key-file`. -3. Resolve endpoint: `--endpoint` if set, else tunnel auto-detect -4. For each network (best-effort): - a. If sponsored + network has sponsor → sponsored path (sign EIP-712 via remote-signer, POST to sponsor) - b. Else → direct path (sign tx via remote-signer, broadcast via eRPC) - c. On success: print CAIP-10 registry line - d. On failure: print warning, continue to next chain -5. Update `agent-registration.json` with all successful registrations in the `registrations[]` array - -### Files -- `cmd/obol/sell.go` (rewrite `sellRegisterCommand`) -- `cmd/obol/sell_test.go` (update `TestSellRegister_Flags`) - ---- - -## Phase 5: Interactive Prompts with `charmbracelet/huh` - -### New dependency - -`go get github.com/charmbracelet/huh` - -### Signature change - -`sellCommand(cfg *config.Config)` → `sellCommand(cfg *config.Config, u *ui.UI)` (match `openclawCommand` pattern). Wire from `main.go`. - -### TTY guard - -```go -import "golang.org/x/term" -isInteractive := term.IsTerminal(int(os.Stdin.Fd())) -``` - -### `sell inference` interactive flow: - -| Field | Default | Prompt type | When prompted | -|-------|---------|-------------|---------------| -| Name | (required) | Text input | No positional arg | -| Model | (required) | Select from Ollama models | `--model` not set | -| Wallet | auto-discovered | Text (pre-filled) | Auto-discover fails | -| Chain | `base-sepolia` | Select | Using default | -| Price | `0.001` | Text (pre-filled) | Confirm or override | - -### `sell http` interactive flow: - -| Field | Default | Prompt type | When prompted | -|-------|---------|-------------|---------------| -| Name | (required) | Text input | No positional arg | -| Upstream | (required) | Text input | `--upstream` not set | -| Port | `8080` | Text (pre-filled) | Confirm | -| Wallet | auto-discovered | Text (pre-filled) | Auto-discover fails | -| Chain | `base-sepolia` | Select | `--chain` not set (remove `Required: true`) | -| Price model | `perRequest` | Select | No price flag set | -| Price value | `0.001` | Text | After model selected | -| Register? | `false` | Confirm | Not explicitly set | - -### `sell register` interactive flow: - -| Field | Default | Prompt type | When prompted | -|-------|---------|-------------|---------------| -| Chain(s) | `base-sepolia` | Multi-select | Using default | -| Name | `Obol Agent` | Text (pre-filled) | Confirm or override | -| Description | auto-generated | Text (pre-filled) | Confirm or override | -| Image | default logo URL | Text (pre-filled) | Confirm or override | -| Sponsored? | yes (when available) | Confirm | Network supports it | -| Endpoint | auto-detected | Text (pre-filled) | Tunnel fails | - -### Non-interactive path - -All prompts gated on `isInteractive`. When not TTY: flag validation applies, defaults used, no prompts. - -### Files -- `go.mod` / `go.sum` (add `charmbracelet/huh`) -- `cmd/obol/sell.go` (add prompts to inference, http, register) -- `cmd/obol/main.go` (wire `*ui.UI` to `sellCommand`) - ---- - -## Phase 6: x402 Payment Chain Updates - -### `cmd/obol/sell.go` — `resolveX402Chain` - -Add: -```go -case "ethereum", "ethereum-mainnet", "mainnet": - return x402.EthereumMainnet, nil -``` - -If `x402.EthereumMainnet` doesn't exist in the upstream `mark3labs/x402-go` library, define a local constant. - -### `cmd/obol/sell.go` — `sellPricingCommand` - -- Auto-discover wallet via `openclaw.ResolveWalletAddress(cfg)` when `--wallet` not set -- Remove `Required: true` from `--wallet` -- Update chain usage help: `"Payment chain (base-sepolia, base, ethereum)"` - -### Files -- `cmd/obol/sell.go` (`resolveX402Chain`, `sellPricingCommand`) -- `internal/x402/config.go` (`ResolveChain` — add ethereum) -- `internal/x402/config_test.go` (add ethereum test cases) -- `cmd/obol/sell_test.go` (update `TestResolveX402Chain`) - ---- - -## Phase 7: Tests & Docs - -### Tests -- `internal/erc8004/networks_test.go`: `ResolveNetwork` all chains, `ResolveNetworks` CSV parsing -- `internal/erc8004/signer_test.go`: HTTP mock for remote-signer API -- `internal/erc8004/sponsor_test.go`: EIP-712 construction, HTTP mock -- `internal/openclaw/wallet_resolve_test.go`: 0/1/multi instance -- `cmd/obol/sell_test.go`: Updated register flags, multi-chain parsing, new x402 chains - -### Docs -- `CLAUDE.md`: Update CLI command table, add `--chain` multi-value, remove `--rpc-url` -- `internal/embed/skills/sell/SKILL.md`: New registration flow, multi-network, remote-signer -- `internal/embed/skills/discovery/SKILL.md`: Multi-network registry info -- `cmd/obol/main.go`: Update root help text for sell register - ---- - -## Dependency Graph - -``` -Phase 1 (multi-network config) - ├──→ Phase 2 (remote-signer integration + registration flows) - └──→ Phase 3 (wallet auto-discovery) - │ - v - Phase 4 (rewrite sell register) ← depends on 1+2+3 - │ - v - Phase 5 (interactive prompts) ← depends on 3 (wallet discovery) - │ - v - Phase 6 (x402 payment chains + sell pricing) - │ - v - Phase 7 (tests & docs — throughout) -``` - ---- - -## Key Design Decisions - -1. **Remote-signer for all signing** — Never extract private keys. Use `POST /api/v1/sign/{address}/transaction` for direct registration, `POST /api/v1/sign/{address}/typed-data` for sponsored EIP-712. Access via temporary port-forward. - -2. **Local eRPC for all chain access** — `http://localhost/rpc/{network}` via k3d port mapping. No public RPCs. eRPC already has upstreams for mainnet, base, base-sepolia. - -3. **Multi-chain `--chain mainnet,base`** — Same agentURI and wallet registered on each chain. Best-effort: if one fails, continue to next. Update `registrations[]` array in `agent-registration.json` with all successes. - -4. **Prefer remote-signer, fallback to `--private-key-file`** — Auto-discover wallet → use remote-signer. If no instance found, accept `--private-key-file` for standalone usage. - -5. **Good defaults for registration metadata** — Pre-fill name (`Obol Agent`), description, image URL. Interactive mode lets users confirm or override each. - -6. **`charmbracelet/huh` for prompts** — Modern TUI with select, input, confirm. TTY-gated. - ---- - -## Key Files Summary - -| File | Change | -|------|--------| -| `internal/erc8004/networks.go` | New — multi-network config registry | -| `internal/erc8004/signer.go` | New — remote-signer REST API client | -| `internal/erc8004/register.go` | New — direct + sponsored registration flows | -| `internal/erc8004/sponsor.go` | New — sponsored API client | -| `internal/erc8004/client.go` | Add `NewClientForNetwork` | -| `internal/openclaw/wallet_resolve.go` | New — wallet address + namespace discovery | -| `cmd/obol/sell.go` | Rewrite register, add prompts to inference/http/register/pricing | -| `cmd/obol/main.go` | Wire `*ui.UI`, update help text | -| `cmd/obol/sell_test.go` | Update all affected tests | -| `internal/x402/config.go` | Add ethereum mainnet chain | - ---- - -## Verification - -```bash -# Phase 1 -go test ./internal/erc8004/ -run TestResolveNetwork - -# Phase 2 (unit — mock remote-signer) -go test ./internal/erc8004/ -run TestRemoteSigner -go test ./internal/erc8004/ -run TestSponsored - -# Phase 3 -go test ./internal/openclaw/ -run TestResolveWallet - -# Phase 4+5 (manual — needs running cluster + tunnel) -obol sell register --chain base-sepolia # direct tx via remote-signer -obol sell register --chain mainnet --sponsored # zero-gas via howto8004 -obol sell register --chain mainnet,base # multi-chain best-effort -obol sell inference # interactive prompts -obol sell http # interactive prompts -obol sell register # interactive with defaults to confirm - -# Phase 6 -obol sell pricing --chain base # auto-discovers wallet - -# All unit tests -go test ./cmd/obol/ -run TestSell -go test ./internal/erc8004/ -go test ./internal/openclaw/ -run TestResolve -go test ./internal/x402/ -run TestResolveChain -``` diff --git a/docs/plans/per-token-metering.md b/docs/plans/per-token-metering.md deleted file mode 100644 index a5839286..00000000 --- a/docs/plans/per-token-metering.md +++ /dev/null @@ -1,164 +0,0 @@ -# Per-Token Metering Plan - -## Scope - -This document defines phase 2 of issue 258: exact seller-side token metering -for paid inference offers, with Prometheus-native monitoring and a lightweight -status surface on `ServiceOffer`. - -Phase 1 is already deployed separately: - -- `perMTok` is accepted by the sell flows -- the enforced x402 charge is approximated as `perMTok / 1000` -- the source pricing metadata is persisted on each pricing route -- buyer and verifier expose operational Prometheus metrics - -This document covers how to replace that approximation for non-streaming -OpenAI-compatible chat completions. - -## Goals - -- Meter actual prompt, completion, and total token usage for paid inference - routes. -- Convert measured usage into estimated USDC using the seller's `perMTok`. -- Expose seller-side metrics through Prometheus. -- Surface roll-up usage on `ServiceOffer.status.usage`. -- Keep the verifier as the pre-request payment gate. - -## Non-Goals - -- Post-pay settlement or escrow. -- Exact metering for streaming responses. -- Exact metering for non-OpenAI response formats. -- Buyer-side billing authority. Buyer token telemetry remains observational. - -## Request Flow - -```text -client - -> Traefik HTTPRoute - -> x402-verifier (pre-request payment gate) - -> x402-meter - -> upstream inference service - -> x402-meter parses usage.total_tokens - -> response returned to client - -> x402-meter exports Prometheus metrics and updates ServiceOffer.status.usage -``` - -Key point: - -- `x402-verifier` still decides whether a request may proceed. -- `x402-meter` becomes the source of truth for exact usage accounting after the - upstream response is known. - -## Config Schema - -`x402-meter` is configured per monetized route. - -```yaml -apiVersion: v1 -kind: ConfigMap -metadata: - name: x402-meter-config - namespace: x402 -data: - config.yaml: | - routes: - - pattern: /services/my-qwen/v1/chat/completions - offerNamespace: llm - offerName: my-qwen - upstreamURL: http://ollama.llm.svc.cluster.local:11434 - upstreamAuth: "" - perMTok: "1.25" - priceModel: perMTok - responseFormat: openai-chat-completions -``` - -Required fields: - -- `pattern` -- `offerNamespace` -- `offerName` -- `upstreamURL` -- `perMTok` - -Optional fields: - -- `upstreamAuth` -- `responseFormat` - -## Status Schema - -`ServiceOffer.status.usage` is extended with a seller-side rollup: - -```yaml -status: - usage: - requests: 124 - promptTokens: 102400 - completionTokens: 18432 - totalTokens: 120832 - estimatedUSDC: "0.15104" - lastUpdated: "2026-03-06T12:34:56Z" -``` - -Rules: - -- `estimatedUSDC` is derived from `totalTokens / 1_000_000 * perMTok` -- values are monotonic rollups, not per-request histories -- writes should be batched to avoid excessive CR status churn - -## Prometheus Metrics - -`x402-meter` exposes `/metrics` and is scraped through a `ServiceMonitor`. - -Metric set: - -- `obol_x402_meter_requests_total{offer_namespace,offer_name,route}` -- `obol_x402_meter_prompt_tokens_total{offer_namespace,offer_name,route}` -- `obol_x402_meter_completion_tokens_total{offer_namespace,offer_name,route}` -- `obol_x402_meter_total_tokens_total{offer_namespace,offer_name,route}` -- `obol_x402_meter_estimated_usdc_total{offer_namespace,offer_name,route}` -- `obol_x402_meter_parse_failures_total{offer_namespace,offer_name,route}` - -Label guidance: - -- keep labels limited to offer identity and route pattern -- do not label by user, wallet, or request id - -## Buyer-Side Observational Metrics - -Buyer-side metrics remain separate from billing: - -- `x402-buyer` continues exposing request, payment, auth pool, and active model - mapping metrics. -- A later extension may parse `usage.total_tokens` from remote seller responses - and emit observational counters keyed by `upstream` and `remote_model`. -- Disagreement between buyer-observed tokens and seller-billed tokens should be - treated as an alerting or debugging signal, not a settlement input. - -## Rollout Plan - -1. Deploy `x402-meter` behind the verifier for one non-streaming paid route. -2. Validate token parsing and Prometheus scrape health. -3. Enable `ServiceOffer.status.usage` updates with rate limiting. -4. Switch sell-side status output from approximation-first to exact-usage-first - whenever meter data is present. -5. Keep the phase-1 `perMTok / 1000` approximation as a fallback for routes not - yet migrated to `x402-meter`. - -## Failure Handling - -- If the response body cannot be parsed, increment - `obol_x402_meter_parse_failures_total` and return the upstream response - unchanged. -- If the upstream omits `usage.total_tokens`, do not synthesize exact billing. -- If status updates fail, metrics must still be emitted. -- If Prometheus is unavailable, request serving must continue. - -## Open Questions - -- Whether streamed responses should be handled with token trailers, chunk - aggregation, or remain explicitly unsupported. -- Whether meter state should be derived solely from Prometheus counters or also - persisted locally for faster CR status reconciliation. diff --git a/docs/x402-test-plan.md b/docs/x402-test-plan.md deleted file mode 100644 index ed694923..00000000 --- a/docs/x402-test-plan.md +++ /dev/null @@ -1,330 +0,0 @@ -# x402 + ERC-8004 Integration Test Plan - -**Feature branch:** `feat/secure-enclave-inference` -**Scope:** 100% coverage of x402 payment gating, ERC-8004 on-chain registration, verifier service, and CLI commands. - ---- - -## 1. Coverage Inventory - -### Current State - -| Package | File | Existing Tests | Coverage | -|---------|------|---------------|----------| -| `internal/erc8004` | `client.go` | TestNewClient, TestRegister | ~60% (missing SetAgentURI, SetMetadata error paths) | -| `internal/erc8004` | `store.go` | TestStore | ~70% (missing Save errors, corrupt file) | -| `internal/erc8004` | `types.go` | none | 0% (JSON marshaling/unmarshaling) | -| `internal/erc8004` | `abi.go` | implicit via client tests | ~50% (missing ABI parse error, constant verification) | -| `internal/x402` | `verifier.go` | 11 tests | ~85% (missing SetRegistration, HandleWellKnown) | -| `internal/x402` | `matcher.go` | 8 tests | ~95% (good) | -| `internal/x402` | `config.go` | implicit via verifier | ~40% (missing LoadConfig, ResolveChain edge cases) | -| `internal/x402` | `watcher.go` | none | 0% | -| `internal/x402` | `setup.go` | none | 0% (kubectl-dependent, needs mock) | -| `cmd/obol` | `monetize.go` | none | 0% | - -### Target: 100% Function Coverage - ---- - -## 2. Unit Tests to Add - -### 2.1 `internal/erc8004` Package - -#### `abi_test.go` (NEW) - -| Test | What it verifies | Priority | -|------|-----------------|----------| -| `TestABI_ParsesSuccessfully` | Embedded ABI JSON parses without error | HIGH | -| `TestABI_AllFunctionsPresent` | All 10 functions present: register (3 overloads), setAgentURI, setMetadata, getMetadata, getAgentWallet, setAgentWallet, unsetAgentWallet, tokenURI | HIGH | -| `TestABI_AllEventsPresent` | All 3 events present: Registered, URIUpdated, MetadataSet | HIGH | -| `TestABI_RegisterOverloads` | 3 distinct register methods exist with correct input counts (0, 1, 2) | HIGH | -| `TestConstants_Addresses` | IdentityRegistryBaseSepolia, ReputationRegistryBaseSepolia, ValidationRegistryBaseSepolia are valid hex addresses (40 chars after 0x) | MEDIUM | -| `TestConstants_ChainID` | BaseSepoliaChainID == 84532 | LOW | - -#### `types_test.go` (NEW) - -| Test | What it verifies | Priority | -|------|-----------------|----------| -| `TestAgentRegistration_MarshalJSON` | Full struct serializes to spec-compliant JSON (type, name, description, image, services, x402Support, active, registrations, supportedTrust) | HIGH | -| `TestAgentRegistration_UnmarshalJSON` | Canonical spec JSON (from ERC8004SPEC.md) deserializes correctly | HIGH | -| `TestAgentRegistration_OmitEmptyFields` | Optional fields (description, image, registrations, supportedTrust) omitted when zero-value | MEDIUM | -| `TestServiceDef_VersionOptional` | ServiceDef without version marshals correctly (version omitempty) | MEDIUM | -| `TestOnChainReg_AgentIDNumeric` | AgentID is int64, serializes as JSON number (not string) | HIGH | -| `TestRegistrationType_Constant` | RegistrationType == `"https://eips.ethereum.org/EIPS/eip-8004#registration-v1"` | LOW | - -#### `client_test.go` (ADDITIONS to existing) - -| Test | What it verifies | Priority | -|------|-----------------|----------| -| `TestNewClient_DialError` | Returns error when RPC URL is unreachable | MEDIUM | -| `TestNewClient_ChainIDError` | Returns error when eth_chainId fails | MEDIUM | -| `TestSetAgentURI` | Successful tx + wait mined (mock sendRawTransaction + receipt) | HIGH | -| `TestSetMetadata` | Successful tx + wait mined | HIGH | -| `TestRegister_NoRegisteredEvent` | Returns error when receipt has no Registered event log | HIGH | -| `TestRegister_TxError` | Returns error when sendRawTransaction fails | MEDIUM | -| `TestGetMetadata_EmptyResult` | Returns nil when contract returns empty bytes | MEDIUM | - -#### `store_test.go` (ADDITIONS to existing) - -| Test | What it verifies | Priority | -|------|-----------------|----------| -| `TestStore_SaveOverwrite` | Second Save overwrites first | MEDIUM | -| `TestStore_LoadCorruptJSON` | Returns error on malformed JSON file | MEDIUM | -| `TestStore_SaveReadOnly` | Returns error when directory is read-only (permission denied) | LOW | - -### 2.2 `internal/x402` Package - -#### `verifier_test.go` (ADDITIONS) - -| Test | What it verifies | Priority | -|------|-----------------|----------| -| `TestVerifier_SetRegistration` | SetRegistration stores data, HandleWellKnown returns it | HIGH | -| `TestVerifier_HandleWellKnown_NoRegistration` | Returns 404 when no registration set | HIGH | -| `TestVerifier_HandleWellKnown_JSON` | Response is valid JSON AgentRegistration with correct Content-Type | HIGH | -| `TestVerifier_ReadyzNotReady` | Returns 503 when config is nil (fresh Verifier without config) | MEDIUM | - -#### `config_test.go` (NEW) - -| Test | What it verifies | Priority | -|------|-----------------|----------| -| `TestLoadConfig_ValidYAML` | Parses complete YAML with wallet, chain, routes | HIGH | -| `TestLoadConfig_Defaults` | Empty chain defaults to "base-sepolia", empty facilitatorURL defaults | HIGH | -| `TestLoadConfig_InvalidYAML` | Returns parse error on malformed YAML | MEDIUM | -| `TestLoadConfig_FileNotFound` | Returns read error | MEDIUM | -| `TestResolveChain_AllSupported` | All 6 chain names resolve (base, base-sepolia, polygon, polygon-amoy, avalanche, avalanche-fuji) | HIGH | -| `TestResolveChain_Aliases` | "base-mainnet" == "base", "polygon-mainnet" == "polygon", etc. | MEDIUM | -| `TestResolveChain_Unsupported` | Returns error for unknown chain name | MEDIUM | -| `TestResolveChain_ErrorMessage` | Error message lists all supported chains | LOW | - -#### `watcher_test.go` (NEW) - -| Test | What it verifies | Priority | -|------|-----------------|----------| -| `TestWatchConfig_DetectsChange` | Write new config file, watcher reloads verifier within interval | HIGH | -| `TestWatchConfig_IgnoresUnchanged` | Same mtime = no reload | MEDIUM | -| `TestWatchConfig_InvalidConfig` | Bad YAML doesn't crash watcher, verifier keeps old config | HIGH | -| `TestWatchConfig_CancelContext` | Context cancellation stops the watcher goroutine cleanly | MEDIUM | -| `TestWatchConfig_MissingFile` | Missing file logged but watcher continues | MEDIUM | - -#### `setup_test.go` (NEW — requires abstraction for kubectl) - -The `setup.go` file shells out to `kubectl`. To unit-test it, extract an interface: - -```go -// KubectlRunner abstracts kubectl execution for testing. -type KubectlRunner interface { - Run(args ...string) error - Output(args ...string) (string, error) -} -``` - -| Test | What it verifies | Priority | -|------|-----------------|----------| -| `TestSetup_PatchesSecretAndConfigMap` | Calls kubectl patch on both secret and configmap with correct args | HIGH | -| `TestSetup_NoKubeconfig` | Returns "cluster not running" error | HIGH | -| `TestAddRoute_AppendsToExisting` | Reads existing config, appends route, patches back | HIGH | -| `TestAddRoute_FirstRoute` | Adds route when routes list is empty | MEDIUM | -| `TestGetPricingConfig_EmptyResponse` | Returns empty PricingConfig when configmap has no data | MEDIUM | -| `TestGetPricingConfig_ParsesYAML` | Correct wallet/chain/routes from kubectl output | HIGH | -| `TestPatchPricingConfig_Serialization` | Generated YAML has correct format (routes array, descriptions) | MEDIUM | - ---- - -## 3. Integration Tests (//go:build integration) - -These require a running k3d cluster with `OBOL_DEVELOPMENT=true`. - -### 3.1 `internal/x402/integration_test.go` (NEW) - -**Prerequisites:** Running cluster, x402 namespace deployed. - -| Test | What it verifies | Runtime | Priority | -|------|-----------------|---------|----------| -| `TestIntegration_X402Setup` | `obol x402 setup --wallet 0x... --chain base-sepolia` patches configmap + secret in cluster | 30s | HIGH | -| `TestIntegration_X402Status` | `obol x402 status` reads correct config from cluster | 15s | HIGH | -| `TestIntegration_X402AddRoute` | `obol x402 setup` then AddRoute() adds route, verifiable via GetPricingConfig | 30s | MEDIUM | -| `TestIntegration_VerifierDeployment` | x402-verifier pod is running, responds to /healthz | 15s | HIGH | -| `TestIntegration_VerifierForwardAuth` | Send request to /verify endpoint with X-Forwarded-Uri, verify 200/402 behavior | 30s | HIGH | -| `TestIntegration_WellKnownEndpoint` | GET /.well-known/agent-registration.json returns valid JSON (after registration set) | 15s | MEDIUM | - -### 3.2 `internal/erc8004/integration_test.go` (NEW) - -**Prerequisites:** Base Sepolia RPC access, funded test wallet (ERC8004_PRIVATE_KEY env var). - -| Test | What it verifies | Runtime | Priority | -|------|-----------------|---------|----------| -| `TestIntegration_RegisterOnBaseSepolia` | Full register() tx on testnet, verify agentID returned | 60s | HIGH | -| `TestIntegration_SetAgentURI` | setAgentURI() after register, verify tokenURI() returns new URI | 60s | HIGH | -| `TestIntegration_SetAndGetMetadata` | setMetadata() + getMetadata() roundtrip | 60s | MEDIUM | -| `TestIntegration_GetAgentWallet` | getAgentWallet() returns owner address after registration | 30s | MEDIUM | - -**Skip logic:** -```go -func TestMain(m *testing.M) { - if os.Getenv("ERC8004_PRIVATE_KEY") == "" { - fmt.Println("Skipping ERC-8004 integration tests: ERC8004_PRIVATE_KEY not set") - os.Exit(0) - } - os.Exit(m.Run()) -} -``` - -### 3.3 End-to-End: x402 Payment Flow - -**File:** `internal/x402/e2e_test.go` (NEW, `//go:build integration`) - -**Prerequisites:** Running cluster with inference network deployed, x402 enabled, funded test wallet. - -| Test | Scenario | Steps | Priority | -|------|----------|-------|----------| -| `TestE2E_InferenceWithPayment` | Full x402 payment lifecycle | 1. Deploy inference network with x402Enabled=true; 2. Configure pricing via AddRoute; 3. Send request WITHOUT payment → 402; 4. Verify 402 body contains payment requirements; 5. Send request WITH valid x402 payment header → 200 | HIGH | -| `TestE2E_RegisterAndServeWellKnown` | ERC-8004 + well-known endpoint | 1. Register agent on Base Sepolia; 2. Set registration on verifier; 3. GET /.well-known/agent-registration.json → matches registration | MEDIUM | - ---- - -## 4. CLI Command Tests - -### `cmd/obol/x402_test.go` (NEW) - -Pattern: Build the CLI app, run subcommands against mocked infrastructure. - -| Test | What it verifies | Priority | -|------|-----------------|----------| -| `TestX402Command_Structure` | x402 has 3 subcommands: register, setup, status | HIGH | -| `TestX402Register_RequiresPrivateKey` | Fails without --private-key or ERC8004_PRIVATE_KEY | HIGH | -| `TestX402Register_TrimsHexPrefix` | 0x-prefixed key handled correctly | MEDIUM | -| `TestX402Setup_RequiresWallet` | Fails without --wallet flag | HIGH | -| `TestX402Setup_DefaultChain` | Default chain is "base-sepolia" | MEDIUM | -| `TestX402Status_NoCluster` | Graceful output when no cluster running | MEDIUM | -| `TestX402Status_NoRegistration` | Shows "not registered" message | MEDIUM | - ---- - -## 5. Helmfile Template Tests - -### Infrastructure Helmfile (conditional x402 resources) - -**File:** `internal/embed/infrastructure/helmfile_test.go` (NEW) - -| Test | What it verifies | Priority | -|------|-----------------|----------| -| `TestHelmfile_X402DisabledByDefault` | x402Enabled=false: no Middleware CRD rendered, no ExtensionRef on eRPC HTTPRoute | HIGH | -| `TestHelmfile_X402Enabled` | x402Enabled=true: Middleware CRD rendered with correct ForwardAuth address, ExtensionRef added to eRPC HTTPRoute | HIGH | - -### Inference Network Template - -**File:** `internal/embed/networks/inference/template_test.go` (NEW) - -| Test | What it verifies | Priority | -|------|-----------------|----------| -| `TestInferenceValues_X402EnabledField` | values.yaml.gotmpl contains x402Enabled field with @enum true,false, @default false | HIGH | -| `TestInferenceHelmfile_X402Passthrough` | x402Enabled value passed through to helmfile.yaml.gotmpl | HIGH | -| `TestInferenceGateway_ConditionalMiddleware` | gateway.yaml: Middleware CRD only rendered when x402Enabled=true | HIGH | -| `TestInferenceGateway_ConditionalExtensionRef` | gateway.yaml: ExtensionRef only present when x402Enabled=true | HIGH | - ---- - -## 6. Coverage Gap Analysis — Functions NOT Tested - -### internal/erc8004 - -| Function | File:Line | Test Status | Action | -|----------|-----------|-------------|--------| -| `NewClient()` | client.go:26 | TESTED | - | -| `Close()` | client.go:57 | implicit | - | -| `Register()` | client.go:63 | TESTED | Add error paths | -| `SetAgentURI()` | client.go:95 | **UNTESTED** | Add test | -| `SetMetadata()` | client.go:114 | **UNTESTED** | Add test | -| `GetMetadata()` | client.go:133 | TESTED | Add empty result | -| `TokenURI()` | client.go:150 | TESTED | - | -| `NewStore()` | store.go:30 | implicit | - | -| `Save()` | store.go:39 | TESTED | Add error paths | -| `Load()` | store.go:55 | TESTED | Add corrupt file | - -### internal/x402 - -| Function | File:Line | Test Status | Action | -|----------|-----------|-------------|--------| -| `NewVerifier()` | verifier.go:25 | TESTED | - | -| `Reload()` | verifier.go:34 | TESTED | - | -| `HandleVerify()` | verifier.go:56 | TESTED (11 cases) | - | -| `HandleHealthz()` | verifier.go:114 | TESTED | - | -| `HandleReadyz()` | verifier.go:120 | TESTED | Add nil config case | -| `SetRegistration()` | verifier.go:131 | **UNTESTED** | Add test | -| `HandleWellKnown()` | verifier.go:136 | **UNTESTED** | Add test | -| `LoadConfig()` | config.go:46 | **UNTESTED** | Add tests | -| `ResolveChain()` | config.go:69 | partial (error case only) | Add all chains | -| `WatchConfig()` | watcher.go:16 | **UNTESTED** | Add tests | -| `Setup()` | setup.go:23 | **UNTESTED** | Needs kubectl abstraction | -| `AddRoute()` | setup.go:70 | **UNTESTED** | Needs kubectl abstraction | -| `GetPricingConfig()` | setup.go:96 | **UNTESTED** | Needs kubectl abstraction | -| `matchRoute()` | matcher.go:19 | TESTED (8 cases) | - | -| `matchPattern()` | matcher.go:29 | TESTED | - | -| `globMatch()` | matcher.go:52 | TESTED | - | - ---- - -## 7. Implementation Priority - -### Phase 1: Unit tests (no cluster needed) — ~2 hours - -1. `internal/erc8004/abi_test.go` — ABI integrity checks -2. `internal/erc8004/types_test.go` — JSON serialization spec compliance -3. `internal/x402/config_test.go` — LoadConfig + ResolveChain -4. `internal/x402/verifier_test.go` — SetRegistration + HandleWellKnown additions -5. `internal/x402/watcher_test.go` — File watcher - -### Phase 2: Missing client methods + error paths — ~1 hour - -6. `internal/erc8004/client_test.go` — SetAgentURI, SetMetadata, error paths -7. `internal/erc8004/store_test.go` — overwrite, corrupt, permissions - -### Phase 3: Setup abstraction + tests — ~1.5 hours - -8. Extract `KubectlRunner` interface from `setup.go` -9. `internal/x402/setup_test.go` — all Setup/AddRoute/GetPricingConfig - -### Phase 4: Integration tests — ~2 hours (requires running cluster) - -10. `internal/x402/integration_test.go` — cluster-based tests -11. `internal/erc8004/integration_test.go` — Base Sepolia testnet tests - -### Phase 5: Template + CLI tests — ~1 hour - -12. Helmfile template rendering tests -13. `cmd/obol/x402_test.go` — CLI command structure + validation - ---- - -## 8. Test Execution Commands - -```bash -# Phase 1-3: Unit tests only -go test -v ./internal/erc8004/... ./internal/x402/... - -# Phase 4: Integration tests (requires cluster + testnet key) -export OBOL_CONFIG_DIR=$(pwd)/.workspace/config -export OBOL_BIN_DIR=$(pwd)/.workspace/bin -export OBOL_DATA_DIR=$(pwd)/.workspace/data -export ERC8004_PRIVATE_KEY= -go build -o .workspace/bin/obol ./cmd/obol -go test -tags integration -v -timeout 15m ./internal/x402/ ./internal/erc8004/ - -# Coverage report -go test -coverprofile=coverage.out ./internal/erc8004/... ./internal/x402/... -go tool cover -html=coverage.out -o coverage.html -``` - ---- - -## 9. Success Criteria - -- [ ] 100% function coverage on `internal/erc8004/` (all 10 exported functions) -- [ ] 100% function coverage on `internal/x402/` (all 14 exported functions) -- [ ] All 3 ABI register overloads verified against canonical ABI -- [ ] JSON serialization roundtrip matches ERC-8004 spec format -- [ ] WatchConfig tested with file changes, cancellation, and error recovery -- [ ] Setup/AddRoute/GetPricingConfig tested via kubectl mock -- [ ] HandleWellKnown tested (200 with data, 404 without) -- [ ] Integration tests skip gracefully when prerequisites unavailable -- [ ] `go test ./...` passes with zero failures diff --git a/features/application_management.feature b/features/application_management.feature new file mode 100644 index 00000000..5af48fb3 --- /dev/null +++ b/features/application_management.feature @@ -0,0 +1,36 @@ +@bdd +Feature: Application management + As a local operator + I want named managed applications that can be installed, synced, listed, and deleted + So that supporting workloads follow the same lifecycle discipline as the rest of the stack + + # References: SPEC Section 3.8 (Application Management and Supporting Operations), B&E Section 2.8 (Managed Applications and Supporting Operations) + + Background: + Given the operator has a running stack and access to supported application sources + + @phase1 @fast + Scenario: Installing an application creates a named managed deployment + Given the operator selects a supported application source + When the operator installs an application with a name + Then the platform records that name as the persistent application identity + And later sync and delete operations target that same managed deployment + + @phase1 @fast + Scenario: Deleting an application removes only the selected deployment + Given multiple managed applications exist + When the operator deletes one named application + Then only that application's deployment artifacts are removed + And unrelated applications remain intact + + @phase1 + Scenario Outline: Sync applies the current desired source state to a named application + Given the operator has an installed application from + When the operator runs app sync for that application + Then the deployment is reconciled against + + Examples: + | source_kind | + | helm chart | + | OCI chart | + | local path | diff --git a/features/buy_side_payments.feature b/features/buy_side_payments.feature new file mode 100644 index 00000000..6b84fef4 --- /dev/null +++ b/features/buy_side_payments.feature @@ -0,0 +1,35 @@ +@bdd +Feature: Buy-side remote inference + As a remote buyer + I want paid remote models to resolve through a bounded-risk payment sidecar + So that I can purchase inference without receiving direct access to signing authority + + # References: SPEC Section 3.6 (Buy-Side Remote Inference), B&E Section 2.6 (Buy-Side Payments) + + Background: + Given the cluster-wide LiteLLM gateway exposes a static paid model namespace + + @phase1 @fast + Scenario: Paid model routing uses the static paid namespace + Given a remote model has been configured for paid access + When a buyer requests that model through LiteLLM + Then the request resolves through the static paid namespace + And payment handling is delegated to the buyer sidecar + + @phase1 @fast + Scenario: Spending is bounded by the pre-signed auth pool + Given the buyer sidecar has a finite pool of pre-signed authorizations + When the sidecar forwards paid requests + Then it uses only the available authorizations in that pool + And it fails explicitly instead of escalating to live signing authority + + @phase1 + Scenario Outline: Unmapped paid models fail explicitly + Given the buyer requests + When no remote payment mapping exists for that model + Then the request fails with an explicit unmapped-model error + + Examples: + | model_name | + | paid/unknown-model | + | paid/missing-offer | diff --git a/features/frontend_and_monitoring.feature b/features/frontend_and_monitoring.feature new file mode 100644 index 00000000..945c12f5 --- /dev/null +++ b/features/frontend_and_monitoring.feature @@ -0,0 +1,36 @@ +@bdd +Feature: Frontend and monitoring surfaces + As a local operator + I want observability and browser surfaces that match the platform's local-first posture + So that I can inspect the stack without accidentally publishing operator-only interfaces + + # References: SPEC Section 3.7 (Tunnel, Discovery, Frontend, and Monitoring), B&E Section 2.7 (Tunnel, Discovery, Frontend, and Monitoring) + + Background: + Given the stack has deployed its default frontend and monitoring components + + @phase1 @fast + Scenario: Frontend stays on the local hostname by default + Given the operator opens the stack frontend + When no explicit architecture change has been made for public exposure + Then the frontend is served through the local hostname contract + And the public tunnel does not expose that interface + + @phase1 @fast + Scenario: Monitoring remains an operator-only surface + Given Prometheus-backed monitoring is installed + When buyers access public monetized services + Then monitoring data remains separate from buyer-facing endpoints + And operator diagnostics stay inside the local control plane + + @phase1 + Scenario Outline: Status surfaces expose operational data through the intended channel + Given the operator inspects + When the platform reports health or runtime state + Then the operator receives + + Examples: + | surface | operational_view | + | sell status | pricing and reconciliation | + | model status | provider and route readiness | + | tunnel | current tunnel activation | diff --git a/features/llm_routing.feature b/features/llm_routing.feature new file mode 100644 index 00000000..46fe1550 --- /dev/null +++ b/features/llm_routing.feature @@ -0,0 +1,37 @@ +@bdd +Feature: LLM routing and provider management + As a local operator + I want one model gateway for local, cloud, and paid inference routes + So that OpenClaw instances and buyers see a consistent model contract + + # References: SPEC Section 3.2 (LLM Routing and Provider Management), B&E Section 2.2 (LLM Routing) + + Background: + Given the stack has a cluster-wide LiteLLM deployment + + @phase1 @fast + Scenario: LiteLLM is the central operator-facing gateway + Given an OpenClaw instance needs model access + When the instance sends inference traffic through the platform + Then the request is routed through LiteLLM + And provider-specific credentials remain centralized at the cluster gateway + + @phase1 @fast + Scenario: Invalid custom endpoints are rejected before publication + Given the operator supplies a custom OpenAI-compatible endpoint + When the operator runs model setup for that endpoint + Then the endpoint is validated before it is added to the route set + And broken provider entries are not published to downstream consumers + + @phase1 + Scenario Outline: Model namespaces resolve to the correct upstream class + Given LiteLLM is configured for + When a request targets the model namespace + Then the platform routes the request to + + Examples: + | namespace_type | model_name | upstream_class | + | local Ollama | llama3.2:3b | the local model runtime | + | cloud Anthropic | claude-sonnet-4-5-20250929 | the Anthropic API | + | cloud OpenAI | gpt-4o | the OpenAI API | + | buy-side paid route | paid/qwen3.5:9b | the x402 buyer sidecar | diff --git a/features/network_management.feature b/features/network_management.feature new file mode 100644 index 00000000..19c1f058 --- /dev/null +++ b/features/network_management.feature @@ -0,0 +1,35 @@ +@bdd +Feature: Network management and eRPC + As a local operator + I want local chain deployments and remote RPC aliases to remain distinct + So that network support claims and routing behavior stay accurate + + # References: SPEC Section 3.3 (Network Management and eRPC), B&E Section 2.3 (Network Management) + + Background: + Given the operator has a running stack with eRPC available + + @phase1 @fast + Scenario: Installable networks come only from embedded bundles + Given the operator wants to deploy a local network + When the operator lists installable networks + Then only embedded deployable network bundles are shown + And remote RPC aliases are not presented as local deployments + + @phase1 @fast + Scenario: Remote RPC aliases default to read-only forwarding + Given the operator adds a remote chain without allow-writes + When requests are routed through eRPC for that chain + Then write methods remain blocked by default + And read-only RPC methods continue to work + + @phase1 + Scenario Outline: Network status matches current command semantics + Given the operator has + When the operator runs network status + Then the command reports + + Examples: + | deployment_state | status_surface | + | local and remote networks configured | global eRPC health and upstreams | + | no named local deployment selected | the current gateway summary contract | diff --git a/features/openclaw_runtime.feature b/features/openclaw_runtime.feature new file mode 100644 index 00000000..b527d115 --- /dev/null +++ b/features/openclaw_runtime.feature @@ -0,0 +1,36 @@ +@bdd +Feature: OpenClaw runtime and agent capabilities + As an agent developer + I want a canonical elevated OpenClaw runtime plus separately managed instances + So that automation and custom agents share the same safe deployment model + + # References: SPEC Section 3.4 (OpenClaw Runtime and Agent Capabilities), B&E Section 2.4 (OpenClaw Runtime) + + Background: + Given the stack has completed baseline startup + + @phase1 @fast + Scenario: The default elevated runtime is prepared automatically + Given the operator has not created any extra OpenClaw instances + When the stack deploys its defaults + Then the canonical elevated OpenClaw runtime is prepared for obol-agent workflows + And the runtime receives the elevated capabilities required by shipped skills + + @phase1 @fast + Scenario: Additional instances remain operator-managed deployments + Given the operator has created one or more named OpenClaw instances + When the operator syncs or deletes an instance + Then the action targets the named deployment the operator selected + And other instances remain unchanged + + @phase1 + Scenario Outline: Operator surfaces resolve to the correct OpenClaw instance + Given the operator targets the instance + When the operator uses the command + Then the command returns data for + + Examples: + | instance_id | surface | + | obol-agent | token | + | obol-agent | dashboard | + | my-agent | token | diff --git a/features/sell_side_monetization.feature b/features/sell_side_monetization.feature new file mode 100644 index 00000000..dff445b2 --- /dev/null +++ b/features/sell_side_monetization.feature @@ -0,0 +1,44 @@ +@bdd +Feature: Sell-side monetization + As a local operator + I want to expose priced services through a ServiceOffer control loop + So that public buyers can discover and pay for bounded compute or HTTP endpoints + + # References: SPEC Section 3.5 (Sell-Side Monetization), B&E Section 2.5 (Sell-Side Monetization) + + Background: + Given the operator has a running stack with the elevated agent runtime available + + @phase1 @fast + Scenario: A ServiceOffer is created in the namespace the operator chose + Given the operator creates a sell-side offer with an explicit namespace + When the CLI submits the ServiceOffer resource + Then the resource is written into that namespace + And downstream pricing and routing assets are derived from that resource + + @phase1 @fast + Scenario: Probe verifies the payment gate without spending buyer funds + Given a sell-side offer has published its payment route + When the operator runs sell probe against that offer + Then the command confirms the payment gate is reachable + And no paid inference budget is consumed + + @phase1 + Scenario Outline: Pricing models remain explicit about their current billing contract + Given a sell-side offer uses the pricing model + When the offer is reconciled successfully + Then the route publishes payment terms for + And operators can inspect the current pricing contract through status surfaces + + Examples: + | pricing_model | + | perRequest | + | perMTok | + | perHour | + + @phase2 + Scenario: Exact token metering supplements the pre-request payment gate + Given an inference offer uses per-token pricing + When phase 2 exact metering is enabled for that route + Then pre-request authorization still happens before execution + And post-response usage updates the seller-side accounting surfaces diff --git a/features/stack_lifecycle.feature b/features/stack_lifecycle.feature new file mode 100644 index 00000000..08d7ba29 --- /dev/null +++ b/features/stack_lifecycle.feature @@ -0,0 +1,36 @@ +@bdd +Feature: Stack lifecycle + As a local operator + I want to initialize, start, stop, and purge the stack safely + So that I can control the local platform without losing important state unexpectedly + + # References: SPEC Section 3.1 (Stack Lifecycle), B&E Section 2.1 (Stack Lifecycle) + + Background: + Given the operator is using the obol CLI against a local workspace + + @phase1 @fast + Scenario: Initialize and start a new stack + Given no stack config exists yet + When the operator runs stack init and then stack up + Then the CLI persists a stable stack identity and backend choice + And baseline infrastructure is deployed before any optional public exposure + + @phase1 @fast + Scenario: Purge without force preserves persistent data + Given a stack has existing config and persistent data + When the operator runs stack purge without force + Then the cluster state and config are removed + And persistent data remains available for later recovery + + @phase1 + Scenario Outline: Startup tolerates missing optional provider dependencies + Given the host + When the operator runs stack up + Then the stack reaches a usable baseline + And provider setup can be completed + + Examples: + | provider_state | recovery_path | + | has discoverable local models | automatically during startup | + | lacks local models or cloud credentials | later through model setup | diff --git a/features/tunnel_and_discovery.feature b/features/tunnel_and_discovery.feature new file mode 100644 index 00000000..050296ca --- /dev/null +++ b/features/tunnel_and_discovery.feature @@ -0,0 +1,36 @@ +@bdd +Feature: Tunnel, discovery, and public exposure + As a local operator + I want public routes to be optional and narrowly scoped + So that local control surfaces remain private while discoverable services can still be published + + # References: SPEC Section 3.7 (Tunnel, Discovery, Frontend, and Monitoring), B&E Section 2.7 (Tunnel, Discovery, Frontend, and Monitoring) + + Background: + Given the stack can run with or without a public tunnel + + @phase1 @fast + Scenario: Quick tunnels are activated on demand + Given the operator has not provisioned a persistent DNS tunnel + When the stack starts + Then the quick tunnel remains dormant until a public route needs it + And local-only operation remains available immediately + + @phase1 @fast + Scenario: Discovery metadata follows the active tunnel URL + Given a public service has discovery metadata + When the active tunnel URL changes + Then discovery metadata is refreshed to reflect the current public address + And stale public URLs are not treated as canonical + + @phase1 + Scenario Outline: Operator surfaces remain local-only unless the architecture changes deliberately + Given the operator inspects the + When the platform computes public exposure rules + Then remains local-only + + Examples: + | surface | + | frontend | + | eRPC | + | monitoring | diff --git a/plans/agent-services.md b/plans/agent-services.md deleted file mode 100644 index a05869ff..00000000 --- a/plans/agent-services.md +++ /dev/null @@ -1,567 +0,0 @@ -# Agent Services: Autonomous x402-Gated HTTP Endpoints - -**Goal:** A skill that lets OpenClaw deploy its own HTTP services into the cluster, gate them with x402 payments, register them with ERC-8004, expose them to the public internet, and monitor earnings — turning the agent from a tool-user into an autonomous economic actor. - ---- - -## Why This Is The One - -The Obol Stack already has every piece: - -| Capability | How it exists today | -|------------|-------------------| -| Wallet | Web3Signer in-cluster, `signer.py` for signing | -| Onchain identity | `agent-identity` skill, ERC-8004 registration | -| Kubernetes cluster | k3d with Traefik gateway | -| Public internet access | Cloudflare tunnel (`obol tunnel`) | -| x402 payment infrastructure | `inference-gateway` binary, Go x402 SDK, Coinbase facilitator | -| Blockchain nodes | eRPC gateway routing to local/remote nodes | - -What's missing: **the agent can't deploy a service, price it, and collect payment.** This skill closes that gap. - ---- - -## Existing Precedent: The Inference Gateway - -The `inference` network (`internal/embed/networks/inference/`) already implements this exact pattern: - -1. User specifies a model, price, wallet, and chain -2. Helmfile deploys: Ollama pod + x402 gateway pod + Service + HTTPRoute + metadata ConfigMap -3. Gateway wraps Ollama's OpenAI-compatible API with x402 payment verification -4. Traefik routes `/inference-/v1/*` to the gateway -5. Cloudflare tunnel makes it publicly accessible -6. Frontend discovers it via the metadata ConfigMap - -**The `agent-services` skill generalises this pattern** from "inference only" to "any HTTP handler the agent writes." - ---- - -## Architecture - -``` -OpenClaw pod (writes handler + config) - │ - │ 1. Agent writes handler.py (business logic) - │ 2. identity.sh registers with ERC-8004 - │ 3. service.sh deploys via helmfile - │ - ▼ -agent-service- namespace - ┌─────────────────────────────┐ - │ Pod: agent-svc- │ - │ ┌────────────────────────┐ │ - │ │ x402-proxy (sidecar) │ │ ← Verifies payment, settles via facilitator - │ │ port 8402 │ │ - │ └──────────┬─────────────┘ │ - │ │ proxy_pass │ - │ ┌──────────▼─────────────┐ │ - │ │ handler.py (main) │ │ ← Agent's business logic (plain HTTP) - │ │ port 8080 │ │ - │ └────────────────────────┘ │ - │ │ - │ ConfigMap: handler-code │ ← Agent's Python handler - │ ConfigMap: svc-metadata │ ← Pricing, endpoints, description - │ Service: agent-svc- │ ← ClusterIP, port 8402 - │ HTTPRoute: agent-svc-│ ← /services//* → port 8402 - └─────────────────────────────┘ - │ - ▼ - Traefik Gateway (traefik namespace) - │ - ▼ - Cloudflare Tunnel → https:///services//* -``` - -### Why a Sidecar Proxy? - -The agent writes **plain HTTP handlers** — no x402 awareness needed. A sidecar `x402-proxy` container handles all payment logic: - -1. Receives inbound request -2. If no payment header → responds `402 Payment Required` with pricing -3. If payment header present → verifies signature via facilitator -4. If valid → proxies request to handler on `localhost:8080` -5. Settles payment onchain via facilitator -6. Returns handler response with `PAYMENT-RESPONSE` header - -**Benefits:** -- Agent doesn't need to understand x402 protocol internals -- Same proxy image reused across all services (already exists as `inference-gateway`) -- Handler can be any language/framework — just serve HTTP on port 8080 -- Payment config is environment variables, not code - -### The x402 Proxy Image - -The existing `inference-gateway` (`cmd/inference-gateway/main.go`) is already a generic x402 reverse proxy. It takes `--upstream`, `--wallet`, `--price`, `--chain`, `--facilitator` flags and wraps any upstream HTTP service with x402 payment gates. - -**Reuse strategy:** The inference gateway image (`ghcr.io/obolnetwork/inference-gateway`) can proxy any upstream, not just Ollama. For `agent-services`, the upstream is `http://localhost:8080` (the agent's handler running in the same pod). - -If needed, we can extract the generic proxy into its own image (`ghcr.io/obolnetwork/x402-proxy`) later. For now, the inference gateway binary works as-is. - ---- - -## Skill Structure - -``` -agent-services/ -├── SKILL.md -├── scripts/ -│ └── service.sh # Deploy, list, update, teardown, monitor -├── templates/ -│ ├── helmfile.yaml.gotmpl # Helmfile template for service deployment -│ ├── handler.py.tmpl # Minimal Python handler scaffold -│ └── metadata.json.tmpl # Service metadata template -└── references/ - └── x402-server-patterns.md # Pricing strategies, facilitator config, chain selection -``` - -### `service.sh` Commands - -```bash -# === Lifecycle === - -# Deploy a new service from a handler file -sh scripts/service.sh deploy \ - --name weather-api \ - --handler ./my_handler.py \ - --price 0.10 \ - --chain base \ - --wallet 0xYourAddress \ - --description "Real-time weather data" \ - --register # auto-register endpoint with ERC-8004 - -# Deploy with the scaffold template (agent fills in the handler later) -sh scripts/service.sh scaffold --name weather-api -# → Creates handler.py from template, agent edits it, then deploys - -# Update handler code (patches ConfigMap, restarts pod) -sh scripts/service.sh update --name weather-api --handler ./updated_handler.py - -# Update pricing (patches gateway config, no restart needed) -sh scripts/service.sh set-price --name weather-api --price 0.05 - -# Tear down a service (deletes namespace + all resources) -sh scripts/service.sh teardown --name weather-api - -# === Discovery === - -# List deployed services with status and URLs -sh scripts/service.sh list - -# Show service details (pricing, endpoints, health, earnings) -sh scripts/service.sh status --name weather-api - -# === Monitoring === - -# Check USDC earnings for a service's wallet -sh scripts/service.sh earnings --name weather-api - -# View service logs -sh scripts/service.sh logs --name weather-api [--tail 100] - -# Health check -sh scripts/service.sh health --name weather-api -``` - -### How `deploy` Works Internally - -``` -1. Validate inputs (handler file exists, chain supported, wallet valid) - -2. Create deployment directory: - $CONFIG_DIR/services// - ├── helmfile.yaml ← generated from template - ├── handler.py ← copied from --handler - └── values.yaml ← generated (price, chain, wallet, etc.) - -3. Run helmfile sync: - helmfile -f $CONFIG_DIR/services//helmfile.yaml sync - - This creates: - - Namespace: agent-svc- - - ConfigMap: handler-code (contains handler.py) - - ConfigMap: svc-metadata (pricing, description, endpoints) - - Deployment: agent-svc- (2 containers: handler + x402 proxy) - - Service: agent-svc- (ClusterIP, port 8402) - - HTTPRoute: agent-svc- (path: /services//*) - -4. Wait for pod ready - -5. If --register flag: - sh scripts/identity.sh --from $WALLET register \ - --uri "ipfs://$(pin metadata.json)" - # Or update existing agent's service endpoints -``` - -### Handler Template (`handler.py.tmpl`) - -The agent gets a minimal scaffold to fill in. No x402 awareness needed — just return HTTP responses. - -```python -#!/usr/bin/env python3 -""" -Agent service handler — {{.Name}} -{{.Description}} - -This runs behind an x402 payment proxy. Requests that reach this -handler have already been paid for. Just return the data. - -Serve on port 8080 (the proxy forwards paid requests here). -""" -import json -from http.server import HTTPServer, BaseHTTPRequestHandler - - -class Handler(BaseHTTPRequestHandler): - def do_GET(self): - """Handle GET requests.""" - # TODO: implement your service logic here - data = {"message": "Hello from {{.Name}}"} - - self.send_response(200) - self.send_header("Content-Type", "application/json") - self.end_headers() - self.wfile.write(json.dumps(data).encode()) - - def do_POST(self): - """Handle POST requests.""" - content_length = int(self.headers.get("Content-Length", 0)) - body = self.rfile.read(content_length) if content_length else b"" - - # TODO: process the request body - data = {"received": len(body)} - - self.send_response(200) - self.send_header("Content-Type", "application/json") - self.end_headers() - self.wfile.write(json.dumps(data).encode()) - - def log_message(self, format, *args): - """Structured logging.""" - print(f"[{{.Name}}] {args[0]}") - - -if __name__ == "__main__": - server = HTTPServer(("0.0.0.0", 8080), Handler) - print(f"[{{.Name}}] Serving on :8080") - server.serve_forever() -``` - -### Helmfile Template (`helmfile.yaml.gotmpl`) - -```yaml -releases: - - name: agent-svc-{{ .Values.name }} - namespace: agent-svc-{{ .Values.name }} - createNamespace: true - chart: bedag/raw - version: 2.1.0 - values: - - resources: - # --- Handler code as ConfigMap --- - - apiVersion: v1 - kind: ConfigMap - metadata: - name: handler-code - data: - handler.py: | -{{ .Values.handlerCode | indent 16 }} - - # --- Service metadata for discovery --- - - apiVersion: v1 - kind: ConfigMap - metadata: - name: svc-metadata - labels: - app.kubernetes.io/part-of: obol.stack - obol.stack/app: agent-service - obol.stack/service-name: {{ .Values.name }} - data: - metadata.json: | - { - "name": "{{ .Values.name }}", - "description": "{{ .Values.description }}", - "pricing": { - "pricePerRequest": "{{ .Values.price }}", - "currency": "USDC", - "chain": "{{ .Values.chain }}" - }, - "endpoints": { - "external": "{{ .Values.publicURL }}/services/{{ .Values.name }}", - "internal": "http://agent-svc-{{ .Values.name }}.agent-svc-{{ .Values.name }}.svc.cluster.local:8402" - } - } - - # --- Deployment: handler + x402 proxy sidecar --- - - apiVersion: apps/v1 - kind: Deployment - metadata: - name: agent-svc-{{ .Values.name }} - spec: - replicas: 1 - selector: - matchLabels: - app: agent-svc-{{ .Values.name }} - template: - metadata: - labels: - app: agent-svc-{{ .Values.name }} - spec: - containers: - # Handler container — agent's business logic - - name: handler - image: python:3.12-slim - command: ["python3", "/app/handler.py"] - ports: - - containerPort: 8080 - volumeMounts: - - name: handler-code - mountPath: /app - readinessProbe: - httpGet: - path: / - port: 8080 - initialDelaySeconds: 3 - periodSeconds: 5 - - # x402 proxy sidecar — payment verification + settlement - - name: x402-proxy - image: ghcr.io/obolnetwork/inference-gateway:latest - args: - - --listen=:8402 - - --upstream=http://localhost:8080 - - --wallet={{ .Values.wallet }} - - --price={{ .Values.price }} - - --chain={{ .Values.chain }} - - --facilitator={{ .Values.facilitator }} - ports: - - containerPort: 8402 - readinessProbe: - httpGet: - path: /health - port: 8402 - initialDelaySeconds: 5 - periodSeconds: 10 - - volumes: - - name: handler-code - configMap: - name: handler-code - - # --- Service --- - - apiVersion: v1 - kind: Service - metadata: - name: agent-svc-{{ .Values.name }} - spec: - selector: - app: agent-svc-{{ .Values.name }} - ports: - - port: 8402 - targetPort: 8402 - name: x402 - - # --- HTTPRoute (Traefik) --- - - apiVersion: gateway.networking.k8s.io/v1 - kind: HTTPRoute - metadata: - name: agent-svc-{{ .Values.name }} - spec: - parentRefs: - - name: traefik-gateway - namespace: traefik - sectionName: web - rules: - - matches: - - path: - type: PathPrefix - value: /services/{{ .Values.name }} - filters: - - type: URLRewrite - urlRewrite: - path: - type: ReplacePrefixMatch - replacePrefixMatch: / - backendRefs: - - name: agent-svc-{{ .Values.name }} - port: 8402 -``` - ---- - -## Integration With Existing Skills - -| Skill | Integration point | -|-------|------------------| -| `agent-identity` | `--register` flag calls `identity.sh register` or `identity.sh set-uri` to advertise the service endpoint in ERC-8004 | -| `local-ethereum-wallet` | Wallet address for x402 payment settlement; `signer.py` for any onchain operations | -| `ethereum-networks` | `rpc.sh` to check USDC balance, query payment transactions, verify settlement | -| `obol-stack` | `kube.py` to monitor service pod health, logs, events | -| `standards` | x402 protocol reference, pricing strategies, facilitator documentation | - ---- - -## RBAC Requirements - -The OpenClaw pod currently has **read-only access to its own namespace**. To deploy services, it needs: - -### Option A: Expand OpenClaw's RBAC (Simple, Less Isolated) - -Add a ClusterRole that lets OpenClaw create resources in `agent-svc-*` namespaces: - -```yaml -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRole -metadata: - name: openclaw-service-deployer -rules: - - apiGroups: [""] - resources: ["namespaces", "configmaps", "services"] - verbs: ["get", "list", "create", "update", "delete"] - - apiGroups: ["apps"] - resources: ["deployments"] - verbs: ["get", "list", "create", "update", "delete"] - - apiGroups: ["gateway.networking.k8s.io"] - resources: ["httproutes"] - verbs: ["get", "list", "create", "update", "delete"] -``` - -### Option B: Deploy via `obol` CLI (Preferred, Uses Existing Patterns) - -Don't give OpenClaw direct k8s write access. Instead: - -1. `service.sh` writes the helmfile + handler to the **host PVC** (same pattern as skills injection) -2. A lightweight controller or CronJob watches for new service definitions and runs `helmfile sync` -3. Or: the agent calls `obol` CLI via the existing passthrough pattern - -**Recommended: Option B** — it follows the existing principle that OpenClaw doesn't mutate cluster state directly. The `obol` binary handles deployment, OpenClaw handles the intent. - -In practice, `service.sh deploy` would: -1. Write helmfile + handler + values to `$DATA_DIR/services//` -2. Call the `obol` CLI wrapper (already available in `$PATH`) to run helmfile sync -3. The `obol` CLI has full kubeconfig access and handles the deployment - -This mirrors how `obol network install` + `obol network sync` work — config is staged, then synced. - ---- - -## Service Lifecycle - -### Deploy -``` -Agent writes handler → service.sh deploy → helmfile sync → pod running → HTTPRoute active → tunnel exposes → ERC-8004 registered -``` - -### Update Handler -``` -Agent edits handler → service.sh update → ConfigMap patched → pod restarted → same URL, new logic -``` - -### Update Price -``` -service.sh set-price → x402 proxy config updated → restarts sidecar only → price change takes effect -``` - -### Teardown -``` -service.sh teardown → helmfile destroy → namespace deleted → ERC-8004 URI updated (mark inactive) -``` - -### Monitor -``` -service.sh earnings → rpc.sh checks USDC balance → shows delta since deployment -service.sh status → pod health + request count + uptime + reputation score -``` - ---- - -## Pricing Strategies (Reference Material) - -The `x402-server-patterns.md` reference would cover: - -### Scheme: `exact` (Live) -Fixed price per request. Simple, predictable. -``` -Price: $0.10 USDC per weather query -Price: $0.001 USDC per data point -``` - -### Scheme: `upto` (Emerging) -Client authorises a maximum, server settles actual cost. Critical for metered services: -``` -LLM inference: max $0.50, settle per token generated -Compute jobs: max $1.00, settle per second of runtime -Data queries: max $0.10, settle per row returned -``` - -### Free Tier Pattern -Set price to 0 for discovery/reputation building. Upgrade later: -```bash -# Start free to build reputation -sh scripts/service.sh deploy --name weather-api --handler ./handler.py --price 0 --register - -# After building reputation, add pricing -sh scripts/service.sh set-price --name weather-api --price 0.05 -``` - -### Chain Selection -| Chain | Gas cost per settlement | Best for | -|-------|------------------------|----------| -| Base | ~$0.001 | Consumer services, micropayments | -| Base Sepolia | Free (testnet) | Development, testing | -| Polygon | ~$0.005 | Medium-value services | -| Avalanche | ~$0.01 | Higher-value services | - ---- - -## Implementation Order - -| Phase | Work | Effort | Dependencies | -|-------|------|--------|-------------| -| **1** | Create `agent-services` SKILL.md | Small | None | -| **2** | Create `service.sh` — scaffold + deploy + teardown | Large | Helmfile template | -| **3** | Create helmfile.yaml.gotmpl + handler.py.tmpl | Medium | Inference gateway image | -| **4** | Create `x402-server-patterns.md` reference | Small | None | -| **5** | Add `service.sh` — update, set-price, list, status | Medium | Phase 2 | -| **6** | Add `service.sh` — earnings monitoring, logs, health | Small | Phase 2 | -| **7** | Add `--register` flag (ERC-8004 integration) | Small | `agent-identity` skill | -| **8** | Add RBAC / obol CLI integration for deployment | Medium | Decision on Option A vs B | -| **9** | Test end-to-end: deploy → pay → earn → rate cycle | Large | All phases | - -### Phase 1-4 delivers a working MVP. Phases 5-9 add polish and integration. - ---- - -## Validation Criteria - -- [ ] Agent can scaffold a handler template with `service.sh scaffold` -- [ ] Agent can deploy a handler that serves HTTP on a public URL -- [ ] Unauthenticated requests receive `402 Payment Required` with pricing info -- [ ] Paid requests (valid x402 signature) reach the handler and return data -- [ ] Payment settles onchain (USDC transferred to agent's wallet) -- [ ] Agent can update handler code without changing the URL -- [ ] Agent can update pricing without redeploying -- [ ] Agent can tear down a service cleanly -- [ ] Agent can list deployed services with status -- [ ] Agent can check USDC earnings -- [ ] `--register` flag creates/updates ERC-8004 registration with service endpoint -- [ ] Service is discoverable by other agents via ERC-8004 + reputation queries -- [ ] All scripts are POSIX sh, work in the OpenClaw pod -- [ ] Follows existing Obol Stack patterns (helmfile, namespace isolation, Traefik HTTPRoute) - ---- - -## Open Questions - -1. **x402 proxy image:** Reuse `inference-gateway` as-is, or extract a generic `x402-proxy` image? The inference gateway already accepts `--upstream` so it works, but the name is misleading for non-inference services. - -2. **Handler language:** Start with Python-only (stdlib HTTPServer, no dependencies)? Or support a generic Docker image where the agent provides a Dockerfile? - -3. **ConfigMap size limit:** Handler code goes in a ConfigMap (1MB limit). For larger services, should we use the PVC injection pattern instead? 1MB is generous for a Python handler but could be limiting for services with bundled data. - -4. **Multi-endpoint services:** One handler = one service = one price? Or support multiple endpoints with different prices within a single service? The x402 middleware can be configured per-path. - -5. **Service discovery by other agents:** Beyond ERC-8004 registration, should there be an in-cluster service registry (ConfigMap-based, like the inference metadata pattern) so co-located agents can discover each other without going onchain? - -6. **Auto-restart on failure:** Should the skill configure liveness probes to auto-restart crashed handlers? The template includes readiness probes but not liveness. - -7. **Rate limiting:** Should there be built-in rate limiting to prevent abuse even with x402 payments? Or is the payment itself sufficient protection? diff --git a/plans/litellmrouting.md b/plans/litellmrouting.md deleted file mode 100644 index f4b731c4..00000000 --- a/plans/litellmrouting.md +++ /dev/null @@ -1,123 +0,0 @@ -# LiteLLM + OpenClaw Smart Routing - -## Context - -When `obol model setup anthropic` adds a cloud provider, OpenClaw can't use the new models because: -1. LiteLLM requires every model to be individually registered in `model_list` -2. OpenClaw's per-agent `models.json` persists stale config (old URLs, old model lists) -3. OpenClaw requires an explicit model allowlist — it does NOT auto-discover from `/v1/models` -4. The sync between LiteLLM config and OpenClaw config is fragile and multi-step - -**Goal**: `obol model setup anthropic` → any Claude model immediately works in OpenClaw. Same for OpenAI. Ollama models work as soon as they're pulled. Direct-to-provider wiring preserved. - -## Approach: Wildcards for Cloud + Explicit for Ollama + Host-Side Patching - -### Why This Approach - -| Feature | LiteLLM | OpenClaw | -|---------|---------|----------| -| `anthropic/*` wildcard | Works | N/A (LiteLLM-side) | -| `openai/*` wildcard | Works | N/A | -| `ollama_chat/*` wildcard | **Broken** | N/A | -| File watcher hot-reload | N/A | **Yes** — hot-applies model changes | - -**Key insight**: LiteLLM wildcards handle cloud routing, but OpenClaw needs an explicit model allowlist. We solve this with: (a) wildcards in LiteLLM so any model routes, and (b) writing a clean `models.json` to OpenClaw's host-side PVC which its file watcher picks up. - -### End-to-End Flows - -**`obol model setup anthropic --api-key sk-ant-...`**: -1. LiteLLM gets `anthropic/*` wildcard + API key in Secret → restarts -2. `syncOpenClawModels()` queries running LiteLLM `/v1/models` for actual available models (falls back to baked-in well-known list if cluster unreachable) -3. Writes clean `models.json` to host PVC (replaces entire file) -4. OpenClaw file watcher hot-reloads — Claude models immediately available, no pod restart - -**`obol model setup ollama`** (new models detected): -1. Explicit `ollama_chat/` entries added to LiteLLM (no wildcards) -2. `syncOpenClawModels()` queries LiteLLM, updates `models.json` -3. OpenClaw hot-reloads - -**Direct-to-provider** (`obol openclaw setup` → choose Anthropic direct): -- Unchanged — `buildDirectProviderOverlay()` is a separate code path, no LiteLLM involved - -## Changes - -### 1. LiteLLM: Wildcard entries for cloud providers - -**File**: `internal/model/model.go` — `buildModelEntries()` - -``` -anthropic → wildcard: model_name: "anthropic/*", model: "anthropic/*" - + explicit entries for requested models (better /v1/models) -openai → wildcard: model_name: "openai/*", model: "openai/*" - + explicit entries for requested models -ollama → unchanged (explicit ollama_chat/ entries) -``` - -### 2. LiteLLM: Enable `drop_params: true` - -**File**: `internal/embed/infrastructure/base/templates/llm.yaml` (line 71) - -Cross-provider compatibility — LiteLLM drops unsupported params instead of erroring when routing across providers. - -### 3. Model list: Live query + baked-in fallback - -**File**: `internal/model/model.go` — `GetConfiguredModels()` - -When syncing to OpenClaw: -1. **Try**: Query running LiteLLM pod's `/v1/models` endpoint (with `check_provider_endpoint: true` so wildcards expand to real models) -2. **Fallback**: Expand wildcards using baked-in `wellKnownModels` map if cluster unreachable - -```go -var wellKnownModels = map[string][]string{ - "anthropic": {"claude-sonnet-4-6", "claude-opus-4", "claude-sonnet-4-5-20250929", "claude-haiku-3-5-20241022"}, - "openai": {"gpt-4o", "gpt-4o-mini", "o3", "o3-mini"}, -} -``` - -### 4. Host-side `models.json` patching (clean replacement) - -**File**: `internal/openclaw/openclaw.go` — new `patchAgentModelsJSON()` - -Writes a **clean** `models.json` to `$DATA_DIR/openclaw-/openclaw-data/.openclaw/agents/main/agent/models.json`. Replaces entire file — no backward-compatible merge needed (the stale llmspy config never shipped). Contains only the `openai` provider pointing at LiteLLM with the current model list. - -### 5. Update `SyncOverlayModels()` — file watcher only, no helmfile re-sync - -**File**: `internal/openclaw/openclaw.go` - -After patching the overlay YAML, also call `patchAgentModelsJSON()` for each instance. **Skip helmfile re-sync** — OpenClaw's file watcher handles `models.json` changes in <1s. Only do helmfile sync when overlay YAML changes that affect the Helm release (e.g. new provider added, not just model list updates). - -### 6. Add `obol model sync` CLI command - -**File**: `cmd/obol/model.go` - -Manual escape hatch: re-reads LiteLLM config (live query) and pushes to all OpenClaw instances. Useful when new models appear after binary was built. - -### 7. Update `detectProvider()` for wildcards - -**File**: `internal/model/model.go` - -Handle wildcard model names (`anthropic/*`, `openai/*`) in provider detection logic. - -### 8. Tests - -- `model_test.go`: wildcard entry generation, wildcard expansion, provider detection for wildcards -- `overlay_test.go`: `models.json` clean write, end-to-end sync - -## Files to Modify - -| File | Changes | -|------|---------| -| `internal/model/model.go` | `buildModelEntries()` wildcards, `GetConfiguredModels()` live query + fallback, `detectProvider()` wildcards, `wellKnownModels` map | -| `internal/openclaw/openclaw.go` | New `patchAgentModelsJSON()`, update `SyncOverlayModels()` to patch models.json + skip helmfile sync | -| `internal/embed/infrastructure/base/templates/llm.yaml` | `drop_params: true` | -| `cmd/obol/model.go` | New `model sync` subcommand | -| `internal/model/model_test.go` | Tests for wildcards | -| `internal/openclaw/overlay_test.go` | Tests for models.json patching | - -## Verification - -1. `go build ./...` + `go test ./...` -2. `obol model setup anthropic --api-key sk-ant-...` → LiteLLM has `anthropic/*` → OpenClaw `models.json` has Claude models → inference works -3. `obol model setup ollama` → new models appear in OpenClaw -4. `obol model sync` → refreshes all instances from live LiteLLM -5. `obol openclaw setup` → direct Anthropic → still works (no LiteLLM) diff --git a/plans/monetise.md b/plans/monetise.md deleted file mode 100644 index 118eaeec..00000000 --- a/plans/monetise.md +++ /dev/null @@ -1,480 +0,0 @@ -# Obol Agent: Autonomous Compute Monetization - -**Branch:** `feat/secure-enclave-inference` | **Date:** 2026-02-25 | **Status:** Architecture proposal - ---- - -## 1. The Goal - -A singleton OpenClaw instance — the **obol-agent** — deployed via `obol agent init`, autonomously monetizes compute resources running in the Obol Stack. A user (or the frontend) declares *what* to expose via a Custom Resource; the obol-agent handles *everything else*: model pulling, health validation, payment gating, public exposure, on-chain registration, and status reporting. - -No separate controller binary. No Go operator. The obol-agent is a regular OpenClaw instance with elevated RBAC and the `monetize` skill. Only one obol-agent can exist per cluster; other OpenClaw instances retain standard read-only access. - ---- - -## 2. How It Works - -``` - ┌──────────────────────────────────┐ - │ User / Frontend / obol CLI │ - │ │ - │ kubectl apply -f offer.yaml │ - │ OR: frontend POST to k8s API │ - │ OR: obol sell http ... │ - └──────────┬───────────────────────────┘ - │ creates CR - ▼ - ┌────────────────────────────────────┐ - │ ServiceOffer CR │ - │ apiVersion: obol.network/v1alpha1 │ - │ kind: ServiceOffer │ - └──────────┬───────────────────────────┘ - │ read by - ▼ - ┌────────────────────────────────────┐ - │ obol-agent (singleton OpenClaw) │ - │ namespace: openclaw- │ - │ │ - │ Cron job (every 60s): │ - │ python3 monetize.py process --all│ - │ │ - │ `monetize` skill: │ - │ 1. Read ServiceOffer CRs │ - │ 2. Pull model (if runtime=ollama) │ - │ 3. Health-check upstream service │ - │ 4. Create ForwardAuth Middleware │ - │ 5. Create HTTPRoute │ - │ 6. Register on ERC-8004 │ - │ 7. Update CR status │ - └────────────────────────────────────┘ -``` - -The obol-agent uses its mounted ServiceAccount token to talk to the Kubernetes API — the same pattern `kube.py` already uses for read-only monitoring, but extended with write operations for Middleware and HTTPRoute resources. - -The reconciliation loop is built on OpenClaw's native **cron system**: a `{ kind: "every", everyMs: 60000 }` job runs `monetize.py process --all` every 60 seconds. No sidecar, no K8s CronJob — the cron scheduler runs inside the OpenClaw Gateway process and persists across pod restarts. - ---- - -## 3. Why Not a Separate Controller - -| Concern | Go operator (controller-runtime) | OpenClaw with `monetize` skill | -|---------|----------------------------------|--------------------------------| -| New binary to build/maintain | Yes — new cmd/, Dockerfile, CI | No — skill is a SKILL.md + Python script | -| Hot-updatable logic | No — rebuild + redeploy image | Yes — update skill files on PVC | -| Error handling | Hardcoded retry/backoff | AI reasons about failures, adapts | -| Watch loop | Built-in informer cache | Built-in cron: `monetize.py process --all` every 60s | -| Dependencies | controller-runtime, kubebuilder, code-gen | stdlib Python (`urllib`, `json`, `ssl`) | -| Existing infrastructure | Needs new Deployment, SA, RBAC | Uses existing OpenClaw pod, SA, skill system | - -The traditional operator pattern is the right answer when you need guaranteed sub-second reconciliation with leader election. For monetization lifecycle (deploy → expose → register → monitor), OpenClaw acting on ServiceOffer CRs via skills is simpler and leverages everything already built. - ---- - -## 4. The CRD - -```yaml -apiVersion: obol.network/v1alpha1 -kind: ServiceOffer -metadata: - name: qwen-inference - namespace: openclaw-default # lives alongside the OpenClaw instance -spec: - # What to serve - model: - name: Qwen/Qwen3.5-35B-A3B # Ollama model tag to pull - runtime: ollama # runtime that serves the model - - # Upstream service (Ollama already running in-cluster) - upstream: - service: ollama # k8s Service name - namespace: openclaw-default # where the service runs - port: 11434 - healthPath: /api/tags # endpoint to probe after pull - - # How to price it - pricing: - amount: "0.50" - unit: MTok # per million tokens - currency: USDC - chain: base - - # Who gets paid - wallet: "0x1234...abcd" - - # Public path - path: /services/qwen-inference - - # On-chain advertisement - register: true -``` - -```yaml -status: - conditions: - - type: ModelReady - status: "True" - reason: PullCompleted - message: "Qwen/Qwen3.5-35B-A3B pulled and loaded on ollama" - - type: UpstreamHealthy - status: "True" - reason: HealthCheckPassed - message: "Model responds to inference at ollama.openclaw-default.svc:11434" - - type: PaymentGateReady - status: "True" - reason: MiddlewareCreated - message: "ForwardAuth middleware x402-qwen-inference created" - - type: RoutePublished - status: "True" - reason: HTTPRouteCreated - message: "Exposed at /services/qwen-inference via traefik-gateway" - - type: Registered - status: "True" - reason: ERC8004Registered - message: "Registered on Base (tx: 0xabc...)" - - type: Ready - status: "True" - reason: AllConditionsMet - endpoint: "https://stack.example.com/services/qwen-inference" - observedGeneration: 1 -``` - -**Design:** -- **Namespace-scoped** — the CR lives in the same namespace as the upstream service. This preserves OwnerReference cascade (garbage collection on delete) and avoids cross-namespace complexity. The obol-agent's ClusterRoleBinding lets it watch ServiceOffers across all namespaces via `GET /apis/obol.network/v1alpha1/serviceoffers` (cluster-wide list). -- **Conditions, not Phase** — [deprecated by API conventions](https://github.com/kubernetes/community/blob/master/contributors/devel/sig-architecture/api-conventions.md#typical-status-properties). Conditions give granular insight into which step failed. -- **Status subresource** — prevents users from accidentally overwriting status. ([docs](https://kubernetes.io/docs/tasks/extend-kubernetes/custom-resources/custom-resource-definitions/#status-subresource)) -- **Same-namespace as upstream** — the Middleware and HTTPRoute are created alongside the upstream service. OwnerReferences work (same namespace), so deleting the ServiceOffer garbage-collects the route and middleware. ([docs](https://kubernetes.io/docs/concepts/overview/working-with-objects/owners-dependents/)) - -### CRD installation - -The CRD manifest is embedded in the infrastructure helmfile (same pattern as `obol-agent.yaml`) and applied during `obol stack init`. No kubebuilder, no code-gen — just a static YAML manifest. - ---- - -## 5. The `monetize` Skill - -``` -internal/embed/skills/monetize/ -├── SKILL.md # Teaches OpenClaw when and how to use this skill -├── scripts/ -│ └── monetize.py # K8s API client for ServiceOffer lifecycle -└── references/ - └── x402-pricing.md # Pricing strategies, chain selection -``` - -### SKILL.md (summary) - -Teaches OpenClaw: -- When a user asks to monetize a service, create a ServiceOffer CR -- When asked to check monetization status, read ServiceOffer CRs and report conditions -- When asked to process offers, run the monetization workflow (health → gate → route → register) -- When asked to stop monetizing, delete the ServiceOffer CR (garbage collection handles cleanup) - -### kube.py extension - -`kube.py` gains write helpers (`api_post`, `api_patch`, `api_delete`) alongside its existing `api_get`. The read-only contract is preserved by convention: `kube.py` commands remain read-only; `monetize.py` imports the shared helpers and adds write operations. Pure Python stdlib — no new dependencies. - -Why not a K8s MCP server? The mounted ServiceAccount token already gives direct API access. An MCP server (e.g., Red Hat's `containers/kubernetes-mcp-server`) adds a sidecar container, image pull, and Helm chart changes for what amounts to wrapping the same REST calls. It's a known upgrade path if K8s operations outgrow script-based tooling, but adds no value today. - -### monetize.py - -``` -python3 monetize.py offers # list ServiceOffer CRs -python3 monetize.py process # run full workflow for one offer -python3 monetize.py process --all # process all pending offers -python3 monetize.py status # show conditions -python3 monetize.py create --upstream .. # create a ServiceOffer CR -python3 monetize.py delete # delete CR (cascades cleanup) -``` - -Each `process` invocation: - -1. **Read the ServiceOffer CR** from the k8s API -2. **Pull the model** — if `spec.model.runtime == ollama`, `POST /api/pull` to Ollama -3. **Health-check** — verify model responds at `..svc:` -4. **Create/update Middleware** — Traefik ForwardAuth pointing at `x402-verifier.x402.svc:8080/verify` -5. **Create/update HTTPRoute** — `parentRef: traefik-gateway`, path from spec, backend = upstream service, filter = the Middleware -6. **ERC-8004 registration** — if `spec.register`, call `signer.py` to sign and submit the registration tx -7. **Update CR status** — set conditions and endpoint - -All via the k8s REST API using the mounted ServiceAccount token. No kubectl, no client-go, no external dependencies. - ---- - -## 6. What Gets Created Per ServiceOffer - -All resources are created in the **same namespace** as the upstream service (and the ServiceOffer CR). OwnerReferences on the ServiceOffer handle cleanup. - -| Resource | Purpose | -|----------|---------| -| `Middleware` (traefik.io/v1alpha1) | ForwardAuth to `x402-verifier.x402.svc:8080/verify` — gates the upstream with payment | -| `HTTPRoute` (gateway.networking.k8s.io/v1) | Routes `spec.path` from Traefik Gateway to upstream, through the Middleware | - -That's it. Two resources. The upstream service already runs. The x402 verifier already runs. The Gateway already runs. The tunnel already runs. - -### Why no new namespace - -The upstream service already has a namespace. Creating a new namespace per offer would mean: -- Cross-namespace OwnerReferences don't work ([docs](https://kubernetes.io/docs/concepts/overview/working-with-objects/owners-dependents/)) -- Need ReferenceGrant for cross-namespace backend refs in HTTPRoute ([docs](https://gateway-api.sigs.k8s.io/api-types/referencegrant/)) -- Broader RBAC (namespace create/delete permissions) - -Instead: Middleware and HTTPRoute live alongside the upstream. Delete the ServiceOffer CR → Kubernetes cascades the deletion. - -### Cross-namespace HTTPRoute → Gateway - -The HTTPRoute references `traefik-gateway` in the `traefik` namespace. No ReferenceGrant needed — the Gateway's `allowedRoutes.namespaces.from: All` handles this. ([Gateway API docs](https://gateway-api.sigs.k8s.io/guides/multiple-ns/)) - -### Middleware locality - -Traefik's `ExtensionRef` in HTTPRoute is a `LocalObjectReference` — Middleware must be in the same namespace as the HTTPRoute. The skill creates it there. ([traefik#11126](https://github.com/traefik/traefik/issues/11126)) - ---- - -## 7. RBAC: Singleton obol-agent vs Regular OpenClaw - -### Two tiers of access - -| | obol-agent (singleton) | Regular OpenClaw instances | -|---|---|---| -| **Deployed by** | `obol agent init` | `obol openclaw onboard` | -| **RBAC** | `openclaw-monetize` ClusterRole | Namespace-scoped read-only Role (chart default) | -| **Skills** | All default skills + `monetize` | Default skills only | -| **Cron** | `monetize.py process --all` every 60s | No monetization cron | -| **Count** | Exactly one per cluster | Zero or more | - -Only the obol-agent gets the elevated ClusterRole. `obol agent init` enforces the singleton constraint — it refuses to create a second obol-agent if one already exists. - -### obol-agent ClusterRole - -```yaml -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRole -metadata: - name: openclaw-monetize -rules: - # Read/write ServiceOffer CRs - - apiGroups: ["obol.network"] - resources: ["serviceoffers"] - verbs: ["get", "list", "watch", "create", "update", "patch", "delete"] - - apiGroups: ["obol.network"] - resources: ["serviceoffers/status"] - verbs: ["get", "update", "patch"] - - # Create Middleware and HTTPRoute in service namespaces - - apiGroups: ["traefik.io"] - resources: ["middlewares"] - verbs: ["get", "list", "create", "update", "patch", "delete"] - - apiGroups: ["gateway.networking.k8s.io"] - resources: ["httproutes"] - verbs: ["get", "list", "create", "update", "patch", "delete"] - - # Read pods/services/endpoints/deployments for health checks (any namespace) - - apiGroups: [""] - resources: ["pods", "services", "endpoints"] - verbs: ["get", "list"] - - apiGroups: ["apps"] - resources: ["deployments"] - verbs: ["get", "list"] - - apiGroups: [""] - resources: ["pods/log"] - verbs: ["get"] -``` - -This is bound to OpenClaw's ServiceAccount via ClusterRoleBinding — the skill needs to read services and create routes across namespaces (e.g., check health of Ollama in `openclaw-default`, create a route for an Ethereum node in `ethereum-knowing-wahoo`). - -### What is explicitly NOT granted - -| Excluded | Why | -|----------|-----| -| `secrets` (cluster-wide) | OpenClaw has secrets access in its own namespace only (chart default) | -| `rbac.authorization.k8s.io/*` | Cannot modify its own permissions | -| `namespaces` create/delete | Doesn't create namespaces | -| `deployments` create/update | Doesn't create workloads — gates existing ones | -| `configmaps` create (cluster-wide) | Reads config for diagnostics, doesn't write it | - -### How this gets applied - -The ClusterRole and ClusterRoleBinding are added to the OpenClaw helmfile generation in `internal/openclaw/openclaw.go`, same as the existing `rbac.create: true` overlay. When `obol openclaw onboard` runs, the chart deploys these RBAC resources alongside the pod. - -**Ref:** [RBAC Good Practices](https://kubernetes.io/docs/concepts/security/rbac-good-practices/) - -### Fix the existing `admin` RoleBinding - -The per-network `agent-rbac.yaml` currently binds the `admin` ClusterRole, which includes Secrets and RBAC manipulation. Replace with a scoped ClusterRole (read pods/services + write Middleware/HTTPRoute). - ---- - -## 8. Admission Policy Guardrail - -Defense-in-depth via [ValidatingAdmissionPolicy](https://kubernetes.io/docs/reference/access-authn-authz/validating-admission-policy/) (GA in k8s 1.30, available in k3s 1.31): - -```yaml -apiVersion: admissionregistration.k8s.io/v1 -kind: ValidatingAdmissionPolicy -metadata: - name: openclaw-monetize-guardrail -spec: - failurePolicy: Fail - matchConstraints: - resourceRules: - - apiGroups: ["traefik.io"] - apiVersions: ["v1alpha1"] - operations: ["CREATE", "UPDATE"] - resources: ["middlewares"] - - apiGroups: ["gateway.networking.k8s.io"] - apiVersions: ["v1"] - operations: ["CREATE", "UPDATE"] - resources: ["httproutes"] - matchConditions: - - name: is-openclaw - expression: >- - request.userInfo.username.startsWith("system:serviceaccount:openclaw-") - validations: - # HTTPRoutes must reference traefik-gateway only - - expression: >- - object.spec.parentRefs.all(ref, - ref.name == "traefik-gateway" && ref.?namespace.orValue("traefik") == "traefik" - ) - message: "OpenClaw can only attach routes to traefik-gateway" - # Middlewares must use ForwardAuth to x402-verifier only - - expression: >- - !has(object.spec.forwardAuth) || - object.spec.forwardAuth.address.startsWith("http://x402-verifier.x402.svc") - message: "ForwardAuth must point to x402-verifier" -``` - -Even if RBAC allows creating any Middleware, the admission policy ensures OpenClaw can only create ForwardAuth rules pointing at the legitimate x402 verifier. A prompt injection can't make it route traffic to an attacker-controlled auth endpoint. - ---- - -## 9. The Full Flow - -``` -1. User: "Monetize Qwen3.5-35B-A3B on Ollama at $0.50 per M token on Base" - -2. OpenClaw (using monetize skill) creates the ServiceOffer CR: - python3 monetize.py create qwen-inference \ - --model Qwen/Qwen3.5-35B-A3B --runtime ollama \ - --upstream ollama --namespace openclaw-default --port 11434 \ - --price 0.50 --unit MTok --chain base --wallet 0x... --register - → Creates ServiceOffer CR via k8s API - -3. OpenClaw processes the offer: - python3 monetize.py process qwen-inference - - Step 1: Pull the model through Ollama - POST http://ollama.openclaw-default.svc:11434/api/pull - {"name": "Qwen/Qwen3.5-35B-A3B"} - → Streams download progress, waits for completion - → sets condition: ModelReady=True - - Step 2: Health-check the model is loaded - POST http://ollama.openclaw-default.svc:11434/api/generate - {"model": "Qwen/Qwen3.5-35B-A3B", "prompt": "ping", "stream": false} - → 200 OK, model responds - → sets condition: UpstreamHealthy=True - - Step 3: Create ForwardAuth Middleware - POST /apis/traefik.io/v1alpha1/namespaces/openclaw-default/middlewares - → ForwardAuth → x402-verifier.x402.svc:8080/verify - → sets condition: PaymentGateReady=True - - Step 4: Create HTTPRoute - POST /apis/gateway.networking.k8s.io/v1/namespaces/openclaw-default/httproutes - → parentRef: traefik-gateway, path: /services/qwen-inference - → filter: ExtensionRef to Middleware - → backendRef: ollama:11434 - → sets condition: RoutePublished=True - - Step 5: ERC-8004 registration - python3 signer.py ... (signs registration tx) - → sets condition: Registered=True - - Step 6: Update status - PATCH /apis/obol.network/v1alpha1/.../serviceoffers/qwen-inference/status - → Ready=True, endpoint=https://stack.example.com/services/qwen-inference - -4. User: "What's the status?" - python3 monetize.py status qwen-inference - → Shows conditions table + endpoint + model info - -5. External consumer pays and calls: - POST https://stack.example.com/services/qwen-inference/v1/chat/completions - X-Payment: - → Traefik → ForwardAuth (x402-verifier) → Ollama (Qwen3.5-35B-A3B) -``` - ---- - -## 10. What the `obol` CLI Does - -The CLI becomes a thin CRD client — no deployment logic, no helmfile: - -```bash -obol sell http --upstream ollama --price 0.001 --chain base -# → creates ServiceOffer CR (same as kubectl apply) - -obol sell list -# → kubectl get serviceoffers (formatted) - -obol sell status qwen-inference -# → shows conditions, endpoint, pricing - -obol sell delete qwen-inference -# → deletes CR (OwnerReference cascades cleanup) -``` - -The frontend can do the same via the k8s API directly. - ---- - -## 11. What We Keep, What We Drop, What We Add - -| Component | Action | Reason | -|-----------|--------|--------| -| `cmd/x402-verifier/` | **Keep** | ForwardAuth verifier — the payment gate | -| `internal/x402/` | **Keep** | Verifier handler | -| `internal/erc8004/` | **Keep** | On-chain registration (called by `monetize.py` via `signer.py`) | -| `internal/enclave/` | **Keep** | Secure Enclave signing (orthogonal to monetization) | -| `internal/inference/gateway.go` | **Drop** | Inline x402 middleware — replaced by ForwardAuth | -| `internal/inference/store.go` | **Drop** | Deployment config on disk — replaced by CRD | -| `obol-agent.yaml` (busybox pod) | **Drop** | OpenClaw IS the agent; no separate placeholder pod | -| `agent-rbac.yaml` (`admin` binding) | **Replace** | Scoped ClusterRole instead of `admin` | -| `cmd/obol/service.go` | **Simplify** | Thin CRD client | -| `cmd/obol/monetize.go` | **Simplify** | Thin CRD client | -| `internal/embed/skills/monetize/` | **Add** | New skill: SKILL.md + `monetize.py` + references | -| ServiceOffer CRD manifest | **Add** | Intent interface, applied during `obol stack init` | -| ValidatingAdmissionPolicy | **Add** | Guardrail on what OpenClaw can create | -| `openclaw-monetize` ClusterRole | **Add** | Scoped write access for Middleware/HTTPRoute | - ---- - -## 12. Resolved Decisions - -| Question | Decision | Rationale | -|----------|----------|-----------| -| **Polling vs event-driven** | OpenClaw cron job, every 60s | OpenClaw has a built-in cron scheduler (`{ kind: "every", everyMs: 60000 }`). No sidecar, no K8s CronJob — runs inside the Gateway process. Jobs persist across restarts via `~/.openclaw/cron/jobs.json`. | -| **Multi-instance** | Singleton obol-agent | Only one obol-agent per cluster, enforced by `obol agent init`. Other OpenClaw instances keep read-only RBAC and no `monetize` skill. No coordination problem. | -| **CRD scope** | Namespace-scoped | OwnerReference cascade works (same namespace as Middleware/HTTPRoute). The obol-agent's ClusterRoleBinding lets it list ServiceOffers across all namespaces. Standard `kubectl get serviceoffers -A` works. | -| **K8s API access** | Extend `kube.py` with write helpers | `kube.py` gains `api_post`, `api_patch`, `api_delete` alongside `api_get`. `monetize.py` imports the shared helpers. Pure stdlib, zero new dependencies. K8s MCP server (Red Hat `containers/kubernetes-mcp-server`) is a known upgrade path but unnecessary today. | - ---- - -## References - -| Topic | Link | -|-------|------| -| Custom Resource Definitions | https://kubernetes.io/docs/concepts/extend-kubernetes/api-extension/custom-resources/ | -| CRD status subresource | https://kubernetes.io/docs/tasks/extend-kubernetes/custom-resources/custom-resource-definitions/#status-subresource | -| API conventions (conditions) | https://github.com/kubernetes/community/blob/master/contributors/devel/sig-architecture/api-conventions.md | -| RBAC | https://kubernetes.io/docs/reference/access-authn-authz/rbac/ | -| RBAC good practices | https://kubernetes.io/docs/concepts/security/rbac-good-practices/ | -| ValidatingAdmissionPolicy | https://kubernetes.io/docs/reference/access-authn-authz/validating-admission-policy/ | -| OwnerReferences | https://kubernetes.io/docs/concepts/overview/working-with-objects/owners-dependents/ | -| Cross-namespace routing (Gateway API) | https://gateway-api.sigs.k8s.io/guides/multiple-ns/ | -| ReferenceGrant | https://gateway-api.sigs.k8s.io/api-types/referencegrant/ | -| Accessing API from a pod | https://kubernetes.io/docs/tasks/run-application/access-api-from-pod/ | -| Pod Security Standards | https://kubernetes.io/docs/concepts/security/pod-security-standards/ | -| Service account tokens | https://kubernetes.io/docs/concepts/security/service-accounts/ | -| Traefik ForwardAuth | https://doc.traefik.io/traefik/reference/routing-configuration/http/middlewares/forwardauth/ | -| Traefik Middleware locality | https://github.com/traefik/traefik/issues/11126 | diff --git a/plans/skills-host-path-injection-v3.md b/plans/skills-host-path-injection-v3.md deleted file mode 100644 index 4c54228d..00000000 --- a/plans/skills-host-path-injection-v3.md +++ /dev/null @@ -1,120 +0,0 @@ -# Skills Host-Path Injection v3 - -## Problem - -The ConfigMap-based skill injection (tar → kubectl create configmap → init container extraction → rollout restart) is fragile, complex, and failed in practice. We need a simpler approach. - -## Solution - -Write embedded skills directly to the host filesystem path that maps to `/data/.openclaw/skills/` inside the OpenClaw container. This is the native skills directory that OpenClaw watches with a file watcher. No ConfigMap, no init container, no restart needed. - -## Key Discovery: Volume Mount Chain - -``` -HOST $DATA_DIR - → k3d volume mount → /data on all k3d nodes - → local-path-provisioner → /data/// - → PVC mount in container → /data -``` - -- **PVC name** (from chart): `openclaw-data` -- **Namespace**: `openclaw-` (e.g. `openclaw-default`) -- **Container mount**: `/data` (persistence.mountPath) -- **State dir**: `/data/.openclaw` (OPENCLAW_STATE_DIR env) -- **Native skills dir watched by OpenClaw**: `/data/.openclaw/skills/` - -## Host Path Formula - -``` -$DATA_DIR / openclaw- / openclaw-data / .openclaw / skills / -``` - -| Mode | Concrete Path | -|------|---------------| -| **Dev** | `.workspace/data/openclaw-/openclaw-data/.openclaw/skills/` | -| **Prod** | `~/.local/share/obol/openclaw-/openclaw-data/.openclaw/skills/` | - -## Implementation Steps - -### 1. Add `skillsVolumePath()` helper - -Returns the host-side path to `/data/.openclaw/skills/` inside the PVC. - -```go -func skillsVolumePath(cfg *config.Config, id string) string { - namespace := fmt.Sprintf("%s-%s", appName, id) - return filepath.Join(cfg.DataDir, namespace, "openclaw-data", ".openclaw", "skills") -} -``` - -### 2. Add `injectSkillsToVolume()` function - -Copies staged skills from config dir directly to the host PVC path. -Called BEFORE helmfile sync so skills are present at first pod boot. - -### 3. Rewrite `SkillsSync()` for runtime use - -`obol openclaw skills sync --from ` now copies to host path instead of creating ConfigMap. - -### 4. Remove old ConfigMap machinery from `doSync()` - -- Remove `ensureNamespaceExists()` call (only existed for pre-creating ConfigMap) -- Remove `syncStagedSkills()` call -- Replace with `injectSkillsToVolume()` call - -### 5. Disable chart skills feature in overlay - -Change overlay from: -```yaml -skills: - enabled: true - createDefault: false -``` -To: -```yaml -skills: - enabled: false -``` - -This removes the init container, ConfigMap volume, and `skills.load.extraDirs` config entirely. OpenClaw uses its native file watcher on `/data/.openclaw/skills/`. - -### 6. Update `copyWorkspaceToPod()` to use host path - -Same pattern — write directly to `$DATA_DIR/openclaw-/openclaw-data/.openclaw/workspace/` instead of kubectl cp. - -## Revised Data Flow - -``` -Embedded skills (internal/embed/skills/) - │ stageDefaultSkills() - ▼ -$CONFIG_DIR/applications/openclaw//skills/ ← staged source - │ injectSkillsToVolume() - ▼ -$DATA_DIR/openclaw-/openclaw-data/.openclaw/skills/ ← host PVC path - │ k3d volume mount - ▼ -Container: /data/.openclaw/skills/ ← native watched dir - │ OpenClaw file watcher - ▼ -Skills loaded ✓ -``` - -## Revised `doSync()` Flow - -**Before**: ensureNamespace → stageSkills → syncStagedSkills(ConfigMap) → helmfile sync → copyWorkspaceToPod(kubectl cp) - -**After**: stageSkills → injectSkillsToVolume(host path) → helmfile sync → copyWorkspaceToVolume(host path) - -## Files Modified - -- `internal/openclaw/openclaw.go` — all changes -- `internal/openclaw/overlay_test.go` — update expected overlay output - -## What Gets Deleted - -- `syncStagedSkills()` function -- ConfigMap creation logic in `SkillsSync()` (rewritten for host-path) -- `ensureNamespaceExists()` call in `doSync()` (before helmfile sync) -- `skills.enabled: true` / `skills.createDefault: false` from overlay -- tar archiving, kubectl delete/create configmap, rollout restart diff --git a/plans/skills-system-redesign-v2.md b/plans/skills-system-redesign-v2.md deleted file mode 100644 index be6fc0ac..00000000 --- a/plans/skills-system-redesign-v2.md +++ /dev/null @@ -1,253 +0,0 @@ -# Skills System Redesign v2 — Final Implementation Record - -> Distilled from v1 notes + Opus analysis. All open questions resolved. Implementation complete. -> The original `skills-system-redesign.md` is preserved as-is for reference. - ---- - -## Guiding Principles - -1. **Stock openclaw feel** — the user should not notice they're in a k8s pod. Lean on native openclaw CLI for skill management. -2. **Don't overengineer** — no custom registries, no git sparse-checkout, no lock files for MVP. Ship the simplest thing that works. -3. **Two delivery channels**: compile-time (embedded in obol binary, staged to host, pushed as ConfigMap) and runtime (`kubectl exec` running native openclaw-cli in-pod). -4. **Smart default resolution** — 0 instances: prompt setup. 1 instance: assume it. 2+ instances: require name. - ---- - -## Architecture - -``` - ┌─────────────────────────────┐ - │ obol CLI binary │ - │ (embedded SKILL.md files) │ - └────────────┬────────────────┘ - │ - ┌────────────────────────┼────────────────────────┐ - │ │ │ - ┌────────▼────────┐ ┌─────────▼─────────┐ ┌─────────▼─────────┐ - │ obol openclaw │ │ obol openclaw │ │ obol openclaw │ - │ onboard / sync │ │ skills add/remove │ │ skills list │ - │ (compile-time) │ │ (runtime) │ │ (runtime) │ - └────────┬────────┘ └─────────┬─────────┘ └─────────┬─────────┘ - │ │ │ - │ stageDefaultSkills │ kubectl exec │ kubectl exec - │ → host config dir │ -c openclaw │ -c openclaw - │ syncStagedSkills │ openclaw skills add │ openclaw skills list - │ → ConfigMap │ (native openclaw CLI) │ (native openclaw CLI) - │ │ │ - └────────────────────────┼────────────────────────┘ - │ - ┌────────▼────────┐ - │ OpenClaw Pod │ - │ ConfigMap mount │ - │ + PVC-backed │ - │ ~/.openclaw/ │ - │ skills/ │ - └─────────────────┘ -``` - -### How skills reach the pod - -| Channel | Mechanism | When | Persistence | -|---------|-----------|------|-------------| -| **Compile-time** (Obol defaults) | Embedded → staged to `$CONFIG_DIR/.../skills/` → pushed as ConfigMap via `SkillsSync()` | Every `doSync()` (onboard and sync) | ConfigMap — chart mounts it | -| **Runtime add/remove** | `kubectl exec -c openclaw deploy/openclaw -- node openclaw.mjs skills add ` | User runs `obol openclaw skills add ...` | PVC — survives restarts | -| **Runtime list** | `kubectl exec -c openclaw deploy/openclaw -- node openclaw.mjs skills list` | User runs `obol openclaw skills list` | Read-only | - -### Why ConfigMap over kubectl cp - -The initial implementation used `kubectl cp` to copy skills directly into the pod. This required the pod to be Running, which fails on first deploy when the image pull takes >60s. The ConfigMap approach: -- Works without waiting for the pod (namespace is sufficient) -- Skills are available when the pod starts (chart's init container extracts them) -- Self-healing: `doSync()` stages defaults if missing, pushes every sync -- The host-path PV backing each PVC remains a fallback if ConfigMap hits limits - ---- - -## Part 1: Default Instance Resolution - -### Implementation: `internal/openclaw/resolve.go` - -```go -func ResolveInstance(cfg *config.Config, args []string) (id string, remaining []string, err error) -func ListInstanceIDs(cfg *config.Config) ([]string, error) -``` - -- **0 instances** → error: `no OpenClaw instances found — run 'obol agent init' to create one` -- **1 instance** → auto-select, return args unchanged -- **2+ instances** → consume `args[0]` if it matches an instance name, else error listing all - -Wired into all subcommands: `sync`, `setup`, `delete`, `token`, `dashboard`, `cli`, `skills`. - -Not needed for: `onboard` (creates new), `list` (shows all). - -### Tests: `internal/openclaw/resolve_test.go` - -9 unit tests covering all 0/1/2+ scenarios, including edge cases (no args, unknown name). - ---- - -## Part 2: Compile-Time Skills (Default Obol Skills) - -### What we embed - -``` -internal/embed/skills/ -├── hello/ -│ └── SKILL.md -└── ethereum/ - └── SKILL.md -``` - -### Delivery (two-stage: stage on host, push as ConfigMap) - -**Stage 1 — `stageDefaultSkills(deploymentDir)`** (called during `Onboard()` before sync, and inside `doSync()` for self-healing): - -- Writes embedded skills to `$CONFIG_DIR/applications/openclaw//skills/` -- **Skips** if `skills/` directory already exists (user customisation takes precedence) - -**Stage 2 — `syncStagedSkills(cfg, id, deploymentDir)`** (called inside `doSync()` after helmfile sync): - -- Checks `skills/` dir has subdirectories -- Calls existing `SkillsSync()` to package into ConfigMap `openclaw--skills` -- Chart's `extract-skills` init container unpacks it on pod (re)start - -**Self-healing**: `doSync()` calls `stageDefaultSkills()` before `syncStagedSkills()`. Instances created before the skills feature get defaults on their next sync. - -### Files - -| File | Status | -|------|--------| -| `internal/embed/skills/hello/SKILL.md` | Created | -| `internal/embed/skills/ethereum/SKILL.md` | Created | -| `internal/embed/embed.go` | Modified — `skillsFS`, `CopySkills()`, `GetEmbeddedSkillNames()` | -| `internal/openclaw/openclaw.go` | Modified — `stageDefaultSkills()`, `syncStagedSkills()`, wired into `Onboard()` + `doSync()` | - ---- - -## Part 3: Runtime Skill Management (`obol openclaw skills`) - -### CLI structure - -``` -obol openclaw skills [instance-name] -├── add → kubectl exec -c openclaw ... node openclaw.mjs skills add -├── remove → kubectl exec -c openclaw ... node openclaw.mjs skills remove -├── list → kubectl exec -c openclaw ... node openclaw.mjs skills list -└── sync --from → packages local dir as ConfigMap (existing SkillsSync mechanism) -``` - -### Implementation - -Thin wrappers in `internal/openclaw/openclaw.go`: - -```go -func SkillAdd(cfg, id, args) → cliViaKubectlExec(cfg, ns, ["skills", "add", ...args]) -func SkillRemove(cfg, id, args) → cliViaKubectlExec(cfg, ns, ["skills", "remove", ...args]) -func SkillList(cfg, id) → cliViaKubectlExec(cfg, ns, ["skills", "list"]) -``` - -`cliViaKubectlExec` uses `-c openclaw` to explicitly target the main container (pod has an `extract-skills` init container that confuses the default container selection). - -### Files - -| File | Status | -|------|--------| -| `cmd/obol/openclaw.go` | Modified — `skills` subcommand group with `add`, `remove`, `list`, `sync` | -| `internal/openclaw/openclaw.go` | Modified — `SkillAdd()`, `SkillRemove()`, `SkillList()` | - ---- - -## Part 4: CLI Structure (Final) - -``` -obol openclaw -├── onboard [--id ] [--force] [--no-sync] -├── sync [instance-name] -├── setup [instance-name] -├── list -├── delete [instance-name] -├── token [instance-name] -├── dashboard [instance-name] -├── cli [instance-name] [-- ] -└── skills [instance-name] - ├── add - ├── remove - ├── list - └── sync --from -``` - -All subcommands (except `onboard` and `list`) auto-resolve the instance when only one exists. - ---- - -## Part 5: Default Obol Skill Content - -### `hello` (SKILL.md) - -Smoke test. Says hello when invoked, confirms skills are loaded. - -### `ethereum` (SKILL.md) - -Ethereum JSON-RPC access via eRPC. Key details: -- Base URL: `http://erpc.erpc.svc.cluster.local:4000` -- Discovery: `GET /` returns config with connected networks -- RPC pattern: `POST /rpc/` with standard JSON-RPC -- Read-only: no write transactions -- Common methods: `eth_blockNumber`, `eth_syncing`, `eth_getBalance`, `eth_call`, `eth_chainId`, etc. - ---- - -## Decisions Made (resolving v1 open questions) - -| Question | Decision | Rationale | -|---|---|---| -| ConfigMap 1MB limit | **Not a concern for MVP** — text SKILL.md files are tiny | Can switch to PVC host-path if needed | -| Skill dependencies | **No** | Skills are independent instruction files | -| Private repo support | **Deferred** — `kubectl exec openclaw skills add` handles natively | Pod fetches from wherever openclaw-cli can | -| Helm chart init container | **Already exists** — `extract-skills` init container unpacks ConfigMap | No chart changes needed | -| Skill validation | **No** — trust skill author | Broken skills just won't work | -| Community skill registry | **Not for MVP** | GitHub repos are sufficient | -| Lock file | **Not for MVP** | Skills are embedded (versioned with binary) or runtime-added | -| GitHub fetching in obol CLI | **Not for MVP** | openclaw-cli in pod does this natively | -| Skill naming | **Plain names** — `hello`, `ethereum` | No `@obol/` prefix needed | -| Sandboxed skills | **Not for MVP** | Docker-in-k8s-in-Docker is fragile | -| Host-path PV for skills | **Fallback option** | Every PVC gets a hostPath PV; can write directly if ConfigMap hits limits | -| `skill` vs `skills` | **`skills` (plural)** | Matches openclaw-cli convention (`node openclaw.mjs skills ...`) | -| kubectl cp vs ConfigMap | **ConfigMap** | No pod readiness dependency; self-healing on every sync | -| Container targeting | **`-c openclaw` explicit** | Pod has `extract-skills` init container; must target main container | - ---- - -## What We Built - -1. **`ResolveInstance()`** — smart instance selection (0/1/2+ logic) for all openclaw subcommands -2. **2 embedded SKILL.md files** — `hello`, `ethereum` -3. **`stageDefaultSkills()` + `syncStagedSkills()`** — two-stage delivery: host staging → ConfigMap push -4. **Self-healing in `doSync()`** — stages defaults for pre-existing instances on next sync -5. **`obol openclaw skills add/remove/list`** — thin wrappers around `kubectl exec -c openclaw ... openclaw skills ...` -6. **`-c openclaw`** in `cliViaKubectlExec()` — explicit container targeting - -### Files created -- `internal/openclaw/resolve.go` -- `internal/openclaw/resolve_test.go` -- `internal/embed/skills/hello/SKILL.md` -- `internal/embed/skills/ethereum/SKILL.md` - -### Files modified -- `internal/embed/embed.go` — skills embed + `CopySkills()` + `GetEmbeddedSkillNames()` -- `internal/openclaw/openclaw.go` — staging, syncing, skill CLI wrappers, `-c openclaw` -- `cmd/obol/openclaw.go` — `ResolveInstance` refactor, `skills` subcommand group - ---- - -## Future Work (Phase 4+) - -| Skill | Priority | Notes | -|-------|----------|-------| -| `obol-wallet` | Nice to have | Web3Signer operations | -| `obol-doctor` | Next release | Stack health diagnostics | -| `obol-tunnel` | Future | Cloudflare tunnel management | -| `obol-deploy` | Future | Deploy apps/networks into the stack | - -When the skill set grows beyond ~10 skills or community contributions start, consider extracting to `github.com/ObolNetwork/openclaw-skills`. diff --git a/plans/skills-system-redesign.md b/plans/skills-system-redesign.md deleted file mode 100644 index d8f40465..00000000 --- a/plans/skills-system-redesign.md +++ /dev/null @@ -1,895 +0,0 @@ -/sc:workflow the ./plans/skills-system-redesign is a concatenation of my notes, and your plans (annotated by me answering your questions). I want you to study both, and take my choices into your implementation. Key things to consider are the refresh of how we do `default` openclaw instances (if we have none, prompt setup, 1 assume its a given, 2+ expect a name mid command you take out and use to route correctly ) in the obol cli. For compile time skills, we will copy them from obol-cli binary to the localhost path that corresponds with the openclaw-gateway's `~/.openclaw/skills`. For run time skill addition using the `obol openclaw skill` commands, lets try the approach of `kubectl exec ... ` running the openclaw-cli on the openclaw-gateway container, with the k8s secret auth token loaded etc. ask me any clarifying questions. don't overengineer features if you don't have to, we want the user to feel like they're using stock openclaw. output it as a new refined plan and keep this one. (Maybe do a cleaned version of this as an interim? we need to sort out the disjointed bits and multiple-choice etc) - -_______________ [My notes] _______ -Agent skills in obol openclaw - -Ideas gathering phase: -Local folder, obol-cli command to zip to .tgz and push to config map. Openclaw chart to detect and uncompress. -github.com/ObolNetwork/skills -Openclaw chart pulls these locally in an init script -Openclaw chart has helm sub packages which just contain skill repos? -What’s the advantage? To manage dependencies helm natively? -We create a derivative openclaw dockerfile, and embed skills in the image? -Review opus’s design -Lots of configurability, needs a tl;dr. -The idea of some skills in the cli so it can handle network/github api rate limits is cool. With local ollama someday you could have an offline, skill enabled obol agent. Should the skills just be in the chart though? Need to answer it about constraints -Some skills like using the stack itself may make more sense than the openclaw chart. The skill to use the stack is broader than that application. - - -we should figure out how a helm chart can bundle a set of skills, that other apps can find at runtime. -does the web3signer app expose a config map other namespaces can read? caps us at 1mb for all skills it exports -can they have shared disk across all apps (i.e. create a PV with them on it)? not easily but maybe if all the pvcs mount as read only that would work? -serve them like a webserver and expose a standard service to find them? ..svc.cluster.local/ -Reloading: “Changes to skills are picked up on the next agent turn when the watcher is enabled.” openclaw hot reloads files on disk -We’ll probably have to make this work for openclaw plugins almost as fast. - -Key note: -__________ -Locations and precedence -Skills are loaded from three places: -Bundled skills: shipped with the install (npm package or OpenClaw.app) -Managed/local skills: ~/.openclaw/skills -Workspace skills: /skills -If a skill name conflicts, precedence is: /skills (highest) → ~/.openclaw/skills → bundled skills (lowest) Additionally, you can configure extra skill folders (lowest precedence) via skills.load.extraDirs in ~/.openclaw/openclaw.json. - -__________ - -Actions: -We should sandbox skills by default maybe? (thats docker in k8s in docker though, so maybe asking for trouble? + routing difficulties to resources in the stack? - -Sandboxed skills + env vars -When a session is sandboxed, skill processes run inside Docker. The sandbox does not inherit the host process.env. Use one of: -agents.defaults.sandbox.docker.env (or per-agent agents.list[].sandbox.docker.env) -bake the env into your custom sandbox image -Global env and skills.entries..env/apiKey apply to host runs only. - - -~/.openclaw/openclaw.json - -{ - skills: { - allowBundled: ["gemini", "peekaboo"], - load: { - extraDirs: ["~/Projects/agent-scripts/skills", "~/Projects/oss/some-skill-pack/skills"], - watch: true, - watchDebounceMs: 250, - }, - install: { - preferBrew: true, - nodeManager: "npm", // npm | pnpm | yarn | bun (Gateway runtime still Node; bun not recommended) - }, - entries: { - "nano-banana-pro": { - enabled: true, - apiKey: "GEMINI_KEY_HERE", - env: { - GEMINI_API_KEY: "GEMINI_KEY_HERE", - }, - }, - peekaboo: { enabled: true }, - sag: { enabled: false }, - }, - }, -} - - - -Conclusion: - -We need to correctly set the openclaw config in our chart, and consider openclaw’s location precedence (above). If for example we put popular named skills in high inheritance places, that would put us in charge of the skill. (eth-wingman, etc) -Management commands: -Stick to openclaw standard and map straight into the gateway. -Requires a change to the obol openclaw CLI structure, I think its worth it. -When obol openclaw is called, first, we count how many instances are installed -If none are installed, prompt the user to do obol agent init -If exactly one is installed, assume that is default, pipe the rest of the commands into the openclaw cli (temporary pod, or the on-host way we have now). It needs to be able to speak to the openclaw gateway. -It needs to be coming from an IP that openclaw will accept for security reasons. -[I guess this depends on what part of the code writes the skill files. If its the CLI, then these files would appear on the host, and we’d be back to packaging them like i would like to avoid.] -1. We could exec on the openclaw container itself and do everything local to the container runtime, that should sort auth and folder writing perms? -2. Plan b, we could on the host write to: $HOME/.config/obol/applications/obol/openclaw/playful-rabbit/.openclaw/skills/ and rely on openclaw’s hot reload behaviour -If more than one instance is installed, then we have to interpret the next word of command input as a petname, use it to decide the host path to write the skill to, or the correct gateway to kubectl exec on, before giving the remainder to a correctly configured openclaw cli (if needed) - -Pre-installed skills: -We need: -[Must have] Ethereum Network (erpc) -[Nice to have] Ethereum Wallet (web3signer) -[Next Release] Obol Doctor (debugs the obol stack) (also will be an obol doctor) -[A plan for a skill to sensibly deploy stuff into the stack] -Open question: -How does LiteLLM handle the idea of telling llms about tools they can opt to use? Is it just dealt with at the openclaw layer and not a concern of the llm? -Same problem as always, how do we get this folder structure on the disk for the openclaw instance. -We can start with: -Embedding the skills in obol-cli (golang) and copying them to the appropriate host directory upon creation/import of a new openclaw instance -Import should be cognizant of folder conficts in case this is an imported openclaw. -Will we have an obol/ namespace in ~/.openclaw/skills? is that standard? - -Nice to haves / hardening: -We should see if we can get the sandboxed skills feature to work -Let people turn off telemetry? (for now we want plenty though to show adoption). - - - -Telemetry (install counts) -When you run clawhub sync while logged in, the CLI sends a minimal snapshot to compute install counts. You can disable this entirely: -export CLAWHUB_DISABLE_TELEMETRY=1 - - - - -https://docs.openclaw.ai/tools/skills-config#skills-config - - -_______________ [Opus 4.6] _______ - -# Skills System Redesign: Remote Skills & Default Skill Packs - -## Problem Statement - -OpenClaw skills are currently filesystem-based directories with a `SKILL.md` file. The only way to get custom skills into a deployed OpenClaw instance is: - -1. Have them locally on disk -2. Run `obol openclaw skills sync --from ` to package them into a ConfigMap -3. Run `obol openclaw sync ` to apply - -There is no way to: -- Pull skills from public GitHub repositories -- Share skills across the community -- Ship default Obol-specific skills with the stack -- Manage skill dependencies or versions -- Add/remove individual skills without replacing the entire set - -## Goals - -1. **`npx`-like skill fetching** - `obol openclaw skills add github.com/org/repo` pulls skills from GitHub -2. **Default Obol skill pack** - ships useful Obol Stack skills out of the box (k8s ops, network management, monitoring) -3. **Skill registry resolution** - shorthand like `@obol/ethereum` resolves to a known GitHub location -4. **Declarative skill management** - skills can be listed in config for reproducible setups -5. **Backward compatible** - existing `skills sync --from ` still works - -## Current Architecture - -### How OpenClaw Loads Skills (npm package internals) - -``` -Load precedence (highest → lowest): -1. /skills/ — per-agent workspace skills -2. ~/.openclaw/skills/ — managed/local skills -3. Bundled skills (npm package) — 40+ built-in skills -4. skills.load.extraDirs — additional paths from openclaw.json -``` - -Each skill is a directory containing `SKILL.md` with YAML frontmatter: - -```markdown ---- -name: my-skill -description: What it does -metadata: - openclaw: - requires: - bins: ["kubectl"] - env: ["KUBECONFIG"] ---- - -# Agent instructions for using this skill... -``` - -### How Obol Stack Delivers Skills Today - -``` -obol openclaw skills sync --from - │ - ├─ tar -czf skills.tgz -C . - ├─ kubectl delete configmap openclaw--skills (if exists) - ├─ kubectl create configmap openclaw--skills --from-file=skills.tgz= - └─ prints "To apply, re-sync: obol openclaw sync " -``` - -The Helm chart (remote `obol/openclaw v0.1.3`) mounts this ConfigMap and extracts it into the pod's skills directory. - -### Overlay Values (current) - -```yaml -skills: - enabled: true - createDefault: true # chart creates empty ConfigMap placeholder -``` - -### Key Constraints - -- The Helm chart is **remote** (`obol/openclaw` from `obolnetwork.github.io/helm-charts/`), not in this repo ANSWER: You can update this chart, its adjacent to you in ../helm-charts. -- Skills ConfigMap has a **1MB limit** (etcd object size limit) — fine for text-based SKILL.md files but limits total skill count ANSWER: lets modify folders on localhost, which are mapped straight into the pods PVs, and openclaw runs a file watcher so it will just detect and reload -- The pod needs skills at filesystem paths — whatever we do must end up as files in the container -- OpenClaw's `skills.load.extraDirs` config and `skills.entries` per-skill config are available levers. ANSWER: and knowing the right host path to write to to end up at ~/.openclaw/skills - ---- - -## Proposed Design - -### Architecture Overview - -``` - ┌─────────────────────────────┐ - │ GitHub / Git Repositories │ - │ │ - │ github.com/ObolNetwork/ │ - │ openclaw-skills/ │ - │ github.com/user/ │ - │ my-custom-skill/ │ - └──────────┬──────────────────┘ - │ - ┌───────────────────┼───────────────────┐ - │ │ │ - ┌─────▼─────┐ ┌───────▼───────┐ ┌──────▼──────┐ - │ CLI Fetch │ │ Init Container│ │ Declarative│ - │ (dev UX) │ │ (GitOps) │ │ Config │ - └─────┬─────┘ └───────┬───────┘ └──────┬──────┘ - │ │ │ - ▼ ▼ ▼ - ┌──────────────────────────────────────────────────┐ - │ Local Skills Directory │ - │ $CONFIG_DIR/applications/openclaw//skills/ │ - │ │ - │ ├── @obol/ │ - │ │ ├── kubernetes/SKILL.md │ - │ │ ├── ethereum/SKILL.md │ - │ │ └── monitoring/SKILL.md │ - │ ├── @user/ │ - │ │ └── custom-skill/SKILL.md │ - │ └── skills.lock.json │ - └──────────────────┬───────────────────────────────┘ - │ - │ obol openclaw skills sync - │ (tar → ConfigMap → helmfile sync) - ▼ - ┌──────────────────┐ - │ OpenClaw Pod │ - │ /skills/ mount │ - └──────────────────┘ -``` - -### Component 1: Skill Source Resolution (`internal/openclaw/skills/`) - -A new `skills` subpackage that handles fetching skills from various sources. - -#### Source Types - -```go -// SkillSource represents a fetchable skill location -type SkillSource struct { - Type string // "github", "local", "builtin" - Owner string // GitHub org/user - Repo string // Repository name - Path string // Subdirectory within repo (optional) - Ref string // Git ref: tag, branch, commit (default: HEAD) - Alias string // Local name override -} -``` - -#### Resolution Rules - -| Input | Resolves To | -|-------|-------------| -| `@obol/kubernetes` | `github.com/ObolNetwork/openclaw-skills/skills/kubernetes@latest` | -| `@obol/ethereum` | `github.com/ObolNetwork/openclaw-skills/skills/ethereum@latest` | -| `github.com/user/repo` | Clone entire repo, find all `SKILL.md` files | -| `github.com/user/repo/path/to/skill` | Clone repo, use specific subdirectory | -| `github.com/user/repo@v1.2.0` | Clone at specific tag | -| `./local/path` | Copy from local filesystem (existing behavior) | - -#### Registry File - -A simple JSON registry embedded in the obol CLI binary that maps shorthand names to GitHub sources: - -```go -//go:embed skills-registry.json -var defaultRegistry []byte -``` - -```json -{ - "version": 1, - "prefix": "@obol", - "repository": "github.com/ObolNetwork/openclaw-skills", - "skills": { - "kubernetes": { - "path": "skills/kubernetes", - "description": "Kubernetes cluster operations via kubectl", - "requires": { "bins": ["kubectl"] } - }, - "ethereum": { - "path": "skills/ethereum", - "description": "Ethereum node management and monitoring", - "requires": { "bins": ["kubectl"] } - }, - "monitoring": { - "path": "skills/monitoring", - "description": "Prometheus/Grafana monitoring operations" - }, - "network-ops": { - "path": "skills/network-ops", - "description": "Obol network install/sync/delete operations" - }, - "tunnel": { - "path": "skills/tunnel", - "description": "Cloudflare tunnel management" - } - } -} -``` - -### Component 2: CLI Commands (`cmd/obol/openclaw.go`) - -Expand the `skills` subcommand group: - -``` -obol openclaw skills -├── add [--ref ] # Fetch skill(s) from GitHub or local path -├── remove # Remove an installed skill -├── list [--remote] # List installed skills (or available @obol skills) -├── sync # Push local skills dir → ConfigMap → pod -├── update [|--all] # Update skill(s) to latest version -└── init [--defaults] # Initialize skills dir with default Obol pack -``` - -#### `obol openclaw skills add` — the npx-like command - -```bash -# Add from the Obol registry (shorthand) -obol openclaw skills add @obol/kubernetes -obol openclaw skills add @obol/ethereum @obol/monitoring - -# Add from any public GitHub repo -obol openclaw skills add github.com/someuser/cool-skill -obol openclaw skills add github.com/someuser/skill-pack/skills/specific-one - -# Add from GitHub with version pinning -obol openclaw skills add github.com/someuser/cool-skill@v2.0.0 - -# Add from local directory (replaces old --from behavior) -obol openclaw skills add ./my-local-skills/custom-skill - -# Add all default Obol skills -obol openclaw skills add @obol/defaults -``` - -**Flow:** - -``` -obol openclaw skills add @obol/kubernetes - │ - ├─ Resolve "@obol/kubernetes" → github.com/ObolNetwork/openclaw-skills/skills/kubernetes - ├─ Sparse checkout (or GitHub API tarball) of just that path - ├─ Validate: SKILL.md exists with valid frontmatter - ├─ Copy to: $CONFIG_DIR/applications/openclaw//skills/@obol/kubernetes/ - ├─ Update skills.lock.json with source, ref, commit SHA - ├─ Print: "✓ Added @obol/kubernetes" - └─ Print: "Run 'obol openclaw skills sync ' to deploy" -``` - -#### `obol openclaw skills init` — bootstrap with defaults - -```bash -# Initialize with the default Obol skill pack -obol openclaw skills init default --defaults - -# This is equivalent to: -obol openclaw skills add @obol/defaults -obol openclaw skills sync default -``` - -#### `obol openclaw skills list` - -```bash -$ obol openclaw skills list default -Installed skills for openclaw/default: - - @obol/kubernetes Kubernetes cluster operations v1.0.0 (up to date) - @obol/ethereum Ethereum node management v1.0.0 (up to date) - @obol/monitoring Prometheus/Grafana operations v1.0.0 (update: v1.1.0) - custom-skill My custom skill from local local - -Total: 4 skill(s) - -$ obol openclaw skills list --remote -Available skills from @obol registry: - - @obol/kubernetes Kubernetes cluster operations via kubectl - @obol/ethereum Ethereum node management and monitoring - @obol/monitoring Prometheus/Grafana monitoring operations - @obol/network-ops Obol network install/sync/delete operations - @obol/tunnel Cloudflare tunnel management -``` - -### Component 3: Skills Lock File - -Track installed skills and their versions for reproducibility: - -```json -{ - "version": 1, - "skills": { - "@obol/kubernetes": { - "source": "github.com/ObolNetwork/openclaw-skills", - "path": "skills/kubernetes", - "ref": "v1.0.0", - "commit": "abc123def456", - "installed": "2026-02-18T12:00:00Z" - }, - "@obol/ethereum": { - "source": "github.com/ObolNetwork/openclaw-skills", - "path": "skills/ethereum", - "ref": "v1.0.0", - "commit": "abc123def456", - "installed": "2026-02-18T12:00:00Z" - }, - "custom-skill": { - "source": "local", - "path": "/Users/dev/my-skills/custom-skill", - "installed": "2026-02-18T14:00:00Z" - } - } -} -``` - -### Component 4: GitHub Fetching Strategy - -Two approaches, use **GitHub API tarball** as primary (no git dependency): - -```go -// Primary: GitHub API tarball download (no git required) -func fetchFromGitHub(owner, repo, path, ref string) (string, error) { - // GET https://api.github.com/repos/{owner}/{repo}/tarball/{ref} - // Extract only the files under {path}/ - // Return path to extracted directory -} - -// Fallback: git sparse-checkout (for private repos or rate limiting) -func fetchViaGit(repoURL, path, ref string) (string, error) { - // git clone --depth 1 --filter=blob:none --sparse - // git sparse-checkout set - // Return path to checked out directory -} -``` - -**Rate limiting**: GitHub API allows 60 requests/hour unauthenticated, 5000 with a token. For the `add` command this is fine (one request per skill add). Support `GITHUB_TOKEN` env var for authenticated requests. - -### Component 5: Default Skills in Onboard Flow - -Modify `Onboard()` to optionally install default skills: - -```go -// In Onboard(), after writing overlay and helmfile: -if opts.Sync { - // Install default Obol skills if skills dir is empty - skillsDir := filepath.Join(deploymentDir, "skills") - if _, err := os.Stat(skillsDir); os.IsNotExist(err) { - fmt.Println("Installing default Obol skills...") - if err := installDefaultSkills(skillsDir); err != nil { - fmt.Printf("Warning: could not install default skills: %v\n", err) - // Non-fatal — continue with deployment - } - } - // Skills sync happens as part of doSync -} -``` - -The default skills should be fetched from `@obol/defaults` (which maps to a curated list). If network is unavailable, fall back to a minimal embedded skill set. - -### Component 6: Embedded Fallback Skills - -For air-gapped or offline scenarios, embed a minimal set of skills directly in the CLI binary: - -```go -//go:embed skills/kubernetes/SKILL.md -//go:embed skills/network-ops/SKILL.md -var embeddedSkills embed.FS -``` - -These serve as a fallback when GitHub is unreachable during `skills init --defaults`. - -### Component 7: Overlay Values Enhancement - -Update `generateOverlayValues()` to support skill configuration in the Helm values: - -```yaml -skills: - enabled: true - createDefault: true - # NEW: Configure per-skill settings via overlay - entries: - kubernetes: - enabled: true - ethereum: - enabled: true - env: - ETHEREUM_NETWORK: "mainnet" - monitoring: - enabled: true -``` - -This maps to OpenClaw's `skills.entries` configuration, giving operators control over which skills are active and their per-skill environment. - -### Component 8: Automatic Skills Sync on Deploy - -Modify `doSync()` to automatically package and push skills if the local skills directory exists: - -```go -func doSync(cfg *config.Config, id string) error { - deploymentDir := deploymentPath(cfg, id) - - // Auto-sync skills if local skills directory exists - skillsDir := filepath.Join(deploymentDir, "skills") - if info, err := os.Stat(skillsDir); err == nil && info.IsDir() { - entries, _ := os.ReadDir(skillsDir) - // Only sync if there are actual skill directories (not just lock file) - hasSkills := false - for _, e := range entries { - if e.IsDir() { - hasSkills = true - break - } - } - if hasSkills { - fmt.Println("Syncing skills to cluster...") - if err := SkillsSync(cfg, id, skillsDir); err != nil { - fmt.Printf("Warning: skills sync failed: %v\n", err) - } - } - } - - // ... existing helmfile sync logic -} -``` - -This removes the two-step manual process. Adding a skill and syncing the deployment automatically picks it up. - ---- - -## Proposed Obol Default Skills - -These would live in `github.com/ObolNetwork/openclaw-skills`: - -### `@obol/kubernetes` - -```markdown ---- -name: kubernetes -description: Kubernetes cluster operations for the Obol Stack -metadata: - openclaw: - requires: - bins: ["kubectl"] - env: ["KUBECONFIG"] ---- - -# Kubernetes Operations - -You have access to kubectl configured for the Obol Stack k3d cluster. - -## Capabilities -- List, describe, and inspect pods, services, deployments across all namespaces -- View pod logs and events -- Check resource usage and node status -- Debug failing pods (describe, logs, events) - -## Conventions -- The stack uses k3d with namespaces per deployment -- Network deployments: `ethereum-`, `helios-`, `aztec-` -- Infrastructure: `traefik`, `erpc`, `monitoring`, `llm`, `obol-frontend` -- Use `kubectl get all -n ` for namespace overview -``` - -### `@obol/ethereum` - -```markdown ---- -name: ethereum -description: Ethereum node management and monitoring -metadata: - openclaw: - requires: - bins: ["kubectl"] ---- - -# Ethereum Node Management - -Manage Ethereum network deployments in the Obol Stack. - -## Capabilities -- Monitor execution and beacon client sync status -- Check peer counts and network connectivity -- View client logs for debugging -- Monitor disk usage and resource consumption -- Check chain head and sync progress - -## Common Operations -- Sync status: `kubectl -n ethereum- logs deploy/execution -f` -- Beacon status: `kubectl -n ethereum- logs deploy/beacon -f` -- Resource usage: `kubectl -n ethereum- top pods` -``` - -### `@obol/monitoring` - -```markdown ---- -name: monitoring -description: Prometheus and Grafana monitoring operations -metadata: - openclaw: - requires: - bins: ["kubectl"] ---- - -# Monitoring Operations - -Access Prometheus metrics and Grafana dashboards for the Obol Stack. - -## Capabilities -- Query Prometheus for metrics -- Check alerting rules and firing alerts -- Monitor resource usage trends -- Access Grafana dashboards -``` - -### `@obol/network-ops` - -```markdown ---- -name: network-ops -description: Obol network deployment lifecycle operations -metadata: - openclaw: - requires: - bins: ["kubectl"] ---- - -# Network Operations - -Manage the full lifecycle of blockchain network deployments. - -## Capabilities -- List installed network deployments -- Check deployment health and sync status -- Monitor resource consumption per deployment -- Assist with network configuration decisions -``` - -### `@obol/tunnel` - -```markdown ---- -name: tunnel -description: Cloudflare tunnel management for public access -metadata: - openclaw: - requires: - bins: ["kubectl"] ---- - -# Tunnel Management - -Manage Cloudflare tunnels for exposing Obol Stack services publicly. - -## Capabilities -- Check tunnel status and connectivity -- View tunnel logs for debugging -- Monitor tunnel routes and DNS configuration -``` - ---- - -## Implementation Phases - -### Phase 1: Core Skill Fetching (MVP) - -**Files to create/modify:** - -| File | Action | Description | -|------|--------|-------------| -| `internal/openclaw/skills/resolve.go` | Create | Source resolution (GitHub URL parsing, @obol shorthand) | -| `internal/openclaw/skills/fetch.go` | Create | GitHub tarball download + extraction | -| `internal/openclaw/skills/lock.go` | Create | Lock file read/write | -| `internal/openclaw/skills/registry.go` | Create | Embedded registry loading | -| `internal/openclaw/skills/skills-registry.json` | Create | Default @obol skill registry | -| `cmd/obol/openclaw.go` | Modify | Add `skills add`, `skills remove`, `skills list`, `skills update` subcommands | -| `internal/openclaw/openclaw.go` | Modify | Update `SkillsSync` to work with new skills dir layout | - -**Deliverables:** -- `obol openclaw skills add ` works with GitHub URLs and @obol shorthand -- `obol openclaw skills remove ` removes a skill -- `obol openclaw skills list` shows installed skills -- Lock file tracks installed skills -- Existing `skills sync --from` still works - -### Phase 2: Default Skills & Auto-Install - -**Files to create/modify:** - -| File | Action | Description | -|------|--------|-------------| -| `internal/openclaw/skills/defaults.go` | Create | Default skill installation logic | -| `internal/openclaw/skills/embedded/` | Create | Minimal embedded fallback skills | -| `internal/openclaw/openclaw.go` | Modify | Wire default skills into `Onboard()` flow | -| `internal/openclaw/openclaw.go` | Modify | Auto-sync skills in `doSync()` | - -**Deliverables:** -- `obol openclaw skills init --defaults` bootstraps default skills -- `Onboard()` installs defaults on first deploy (with network fallback to embedded) -- `doSync()` automatically packages skills if present -- No more two-step manual skills sync - -### Phase 3: Skill Pack Repository - -**External repository:** `github.com/ObolNetwork/openclaw-skills` - -| Path | Description | -|------|-------------| -| `skills/kubernetes/SKILL.md` | K8s cluster operations | -| `skills/ethereum/SKILL.md` | Ethereum node management | -| `skills/monitoring/SKILL.md` | Prometheus/Grafana ops | -| `skills/network-ops/SKILL.md` | Network lifecycle management | -| `skills/tunnel/SKILL.md` | Cloudflare tunnel management | -| `README.md` | Contributing guide for community skills | - -**Deliverables:** -- Public repo with curated Obol skills -- CI validation that all skills have valid SKILL.md frontmatter -- Tagged releases for version pinning - -### Phase 4: Helm Chart Integration (Upstream) - -Changes to the **remote** `obol/openclaw` Helm chart (separate repo): - -- Support `skills.sources` in values for declarative skill fetching via init container -- Init container that can `git clone` or download skills from configured sources -- This enables GitOps workflows where skills are declared in values, not manually pushed - -```yaml -# Future values-obol.yaml -skills: - enabled: true - sources: - - name: obol-defaults - repo: github.com/ObolNetwork/openclaw-skills - ref: v1.0.0 - path: skills/ - - name: custom - repo: github.com/myorg/my-skills - ref: main - entries: - kubernetes: - enabled: true - ethereum: - enabled: true -``` - -This phase requires coordination with the upstream openclaw Helm chart maintainers. - ---- - -## Directory Layout (Post-Implementation) - -``` -$CONFIG_DIR/applications/openclaw// -├── values-obol.yaml -├── helmfile.yaml -├── values-obol.secrets.json -└── skills/ # NEW: managed skills directory - ├── skills.lock.json # Tracks sources, versions, commits - ├── @obol/ # Namespaced by source - │ ├── kubernetes/ - │ │ └── SKILL.md - │ ├── ethereum/ - │ │ └── SKILL.md - │ ├── monitoring/ - │ │ └── SKILL.md - │ ├── network-ops/ - │ │ └── SKILL.md - │ └── tunnel/ - │ └── SKILL.md - └── @someuser/ # Community skills - └── custom-skill/ - └── SKILL.md -``` - ---- - -## CLI UX Examples - -### First-time setup with defaults - -```bash -$ obol agent init -Generated deployment ID: default - ✓ Ollama detected at http://localhost:11434 - -✓ OpenClaw instance configured! - Installing default Obol skills... - ✓ Added @obol/kubernetes - ✓ Added @obol/ethereum - ✓ Added @obol/monitoring - ✓ Added @obol/network-ops - ✓ Added @obol/tunnel - -Deploying to cluster... - Syncing skills to cluster... - ✓ Skills ConfigMap updated: openclaw-default-skills - Running helmfile sync... - -✓ OpenClaw installed with 5 default skills! -``` - -### Adding a community skill - -```bash -$ obol openclaw skills add github.com/ethbuilder/validator-skill -Fetching github.com/ethbuilder/validator-skill... - ✓ Found valid SKILL.md (name: validator-ops, description: Ethereum validator management) - ✓ Added to skills/ethbuilder/validator-ops/ - -Run 'obol openclaw skills sync default' to deploy - -$ obol openclaw skills sync default -Syncing skills to cluster... - ✓ Skills ConfigMap updated: openclaw-default-skills - Running helmfile sync... -✓ Skills deployed (6 skills) -``` - -### Updating skills - -```bash -$ obol openclaw skills update --all -Checking for updates... - @obol/kubernetes v1.0.0 → v1.1.0 (updated) - @obol/ethereum v1.0.0 (up to date) - @obol/monitoring v1.0.0 → v1.0.1 (updated) - @obol/network-ops v1.0.0 (up to date) - @obol/tunnel v1.0.0 (up to date) - -Updated 2 skill(s). Run 'obol openclaw skills sync default' to deploy. -``` - ---- - -## Open Questions - -1. **ConfigMap size limit**: With many skills, we may hit the 1MB etcd limit. Should we split into multiple ConfigMaps or use a PVC-based approach for large skill sets? - -2. **Skill dependencies**: Should skills be able to declare dependencies on other skills? (e.g., `@obol/ethereum` depends on `@obol/kubernetes`). Adds complexity but prevents broken skill chains. - -3. **Private repository support**: Should we support `GITHUB_TOKEN` for private repos from day one, or add it later? The fetch code should accept it but the UX can wait. - -4. **Helm chart init container (Phase 4)**: This requires upstream chart changes. Should we propose the chart changes early and develop in parallel, or wait until the CLI-side is proven? - -5. **Skill validation**: Should `skills add` validate that the skill's `requires.bins` are available in the target pod image, or just warn? Strict validation prevents broken skills but may be too rigid. - -6. **Community skill registry**: Beyond `@obol/` shorthand, should there be a discoverable registry (like npm) for community skills? Or is GitHub search + convention (`openclaw-skill-*` repos) sufficient? - ---- - -## Risk Assessment - -| Risk | Probability | Impact | Mitigation | -|------|-------------|--------|------------| -| GitHub API rate limiting | Medium | Low | Support `GITHUB_TOKEN`, cache downloads, embedded fallback | -| ConfigMap size limit | Low | Medium | Monitor archive sizes, split if needed, document limits | -| Upstream chart incompatibility | Low | High | Test against pinned chart version, coordinate with chart maintainers | -| Stale/broken community skills | Medium | Low | Validation on `skills add`, clear error messages, `skills check` command | -| Network unavailable during init | Medium | Medium | Embedded fallback skills, graceful degradation | - ---- - -## Success Criteria - -- [ ] `obol openclaw skills add @obol/kubernetes` fetches and installs the skill in <5 seconds -- [ ] `obol agent init` installs default skills automatically on first deploy -- [ ] `obol openclaw skills list` shows all installed skills with version info -- [ ] Community skills from arbitrary GitHub repos work without special configuration -- [ ] Existing `skills sync --from ` workflow continues to work unchanged -- [ ] Default Obol skills provide meaningful agent capabilities for stack operations -- [ ] Skills survive pod restarts (ConfigMap-backed persistence) -- [ ] Lock file enables reproducible skill sets across environments - - diff --git a/plans/terminal-ux-improvement.md b/plans/terminal-ux-improvement.md deleted file mode 100644 index a331e7af..00000000 --- a/plans/terminal-ux-improvement.md +++ /dev/null @@ -1,135 +0,0 @@ -# Plan: Obol Stack CLI Terminal UX Improvement - -## Context - -The obol CLI (`cmd/obol`) and the bootstrap installer (`obolup.sh`) had inconsistent terminal output styles. obolup.sh had a clean visual language (colored `==>`, `✓`, `!`, `✗` prefixes, suppressed subprocess output), while the Go CLI used raw `fmt.Println` with no colors, no spinners, and direct subprocess passthrough that flooded the terminal with helmfile/k3d/kubectl output. Invalid commands produced poor error messages with no suggestions. - -**Goal**: Unify the visual language across both tools, capture subprocess output behind spinners, and add `--verbose`/`--quiet` flags for different user needs. - -**Decision**: User chose "Capture + spinner" for subprocess handling and Charmbracelet lipgloss as the styling library. - -## What Was Built - -### New Package: `internal/ui/` (7 files) - -| File | Exports | Purpose | -|------|---------|---------| -| `ui.go` | `UI` struct, `New(verbose)`, `NewWithOptions(verbose, quiet)` | Core type with TTY detection, verbose/quiet flags | -| `output.go` | `Info`, `Success`, `Warn`, `Error`, `Print`, `Printf`, `Detail`, `Dim`, `Bold`, `Blank` | Colored message functions matching obolup.sh's `log_*` style. Quiet mode suppresses all except Error/Warn. | -| `exec.go` | `Exec(ExecConfig)`, `ExecOutput(ExecConfig)` | Subprocess capture: spinner by default, streams with `--verbose`, dumps captured output on error | -| `spinner.go` | `RunWithSpinner(msg, fn)` | Braille spinner (`⠋⠙⠹⠸⠼⠴⠦⠧⠇⠏`) — minimal goroutine impl, no bubbletea | -| `prompt.go` | `Confirm`, `Select`, `Input`, `SecretInput` | Thin wrappers around `bufio.Reader` with lipgloss formatting | -| `errors.go` | `FormatError`, `FormatActionableError` | Structured error display with hints and next-step commands | -| `suggest.go` | `SuggestCommand`, `findSimilarCommands` | Levenshtein distance for "did you mean?" on unknown commands | - -### Output Style (unified across both tools) - -``` -==> Starting cluster... (blue, top-level header — no indent) - ✓ Cluster created (green, subordinate result — 2-space indent) - ! DNS config skipped (yellow, warning — 2-space indent) -✗ Helmfile sync failed (red, error — no indent) -``` - -### Subprocess Capture Pattern - -- **Default** (TTY, not verbose): Spinner + buffer. Success → ` ✓ msg (Xs)`. Failure → `✗ msg` + dump captured output. -- **`--verbose`**: Stream subprocess output live, each line prefixed with dim ` │ `. -- **Non-TTY** (pipe/CI): Plain text, no spinner, live stream. -- **Exception**: Passthrough commands (`obol kubectl`, `obol helm`, `obol k9s`, `obol openclaw cli`) keep direct stdin/stdout piping. - -### Global Flags - -| Flag | Env Var | Effect | -|------|---------|--------| -| `--verbose` | `OBOL_VERBOSE=1` | Stream subprocess output live with `│` prefix | -| `--quiet` / `-q` | `OBOL_QUIET=1` | Suppress all output except errors and warnings | - -### CLI Improvements - -- **Colored errors**: `log.Fatal(err)` replaced with `✗ error message` (red) -- **"Did you mean?"**: Levenshtein-based command suggestions on typos (`obol netwerk` → "Did you mean? obol network") -- **Interactive prompts**: `obol model setup` uses styled select menu + hidden API key input via `ui.SecretInput` - -## Phased Rollout (as executed) - -### Phase 1: Foundation -Created `internal/ui/` package (7 files), added lipgloss dependency, wired `--verbose` flag, `Before` hook, `CommandNotFound` handler, replaced `log.Fatal` with colored error output. - -**Files created**: `internal/ui/*.go` -**Files modified**: `go.mod`, `cmd/obol/main.go` - -### Phase 2: Stack Lifecycle (highest impact) -Migrated `stack init/up/down/purge` — the noisiest commands. Added `*ui.UI` to `Backend` interface. Converted ~8 subprocess passthrough sites to `u.Exec()`. `waitForAPIServer` and polling loops wrapped in spinners. - -**Files modified**: `internal/stack/stack.go`, `internal/stack/backend.go`, `internal/stack/backend_k3d.go`, `internal/stack/backend_k3s.go`, `internal/stack/backend_test.go`, `internal/stack/stack_test.go`, `cmd/obol/bootstrap.go`, `cmd/obol/main.go` - -### Phase 3: Network + OpenClaw + App + Agent -Migrated network install/sync/delete, openclaw onboard/sync/setup/delete/skills, app install/sync/delete, and agent init. Cascaded `*ui.UI` through all call chains. Converted confirmation prompts to `u.Confirm()`. - -**Files modified**: `internal/network/network.go`, `internal/openclaw/openclaw.go`, `internal/openclaw/skills_injection_test.go`, `internal/app/app.go`, `internal/agent/agent.go`, `cmd/obol/network.go`, `cmd/obol/openclaw.go`, `cmd/obol/main.go` - -### Phase 4: Update, Tunnel, Model -Migrated remaining internal packages. `update.ApplyUpgrades` helmfile sync captured. All tunnel operations use `u.Exec()` (except interactive `cloudflared login` and `logs -f`). `model.ConfigureLLMSpy` status messages styled. - -**Files modified**: `internal/update/update.go`, `internal/tunnel/tunnel.go`, `internal/tunnel/login.go`, `internal/tunnel/provision.go`, `internal/model/model.go`, `cmd/obol/update.go`, `cmd/obol/model.go`, `cmd/obol/main.go` - -### Phase 5: Polish -Added `--quiet` / `-q` global flag with `OBOL_QUIET` env var. Quiet mode suppresses all output except errors/warnings. Migrated `obol model setup` interactive prompt to use `ui.Select()` + `ui.SecretInput()`. Fixed `cmd/obol/update.go` to use `getUI(c)` instead of `ui.New(false)`. - -**Files modified**: `internal/ui/ui.go`, `internal/ui/output.go`, `cmd/obol/main.go`, `cmd/obol/update.go`, `cmd/obol/model.go` - -### Phase 6: obolup.sh Alignment -Aligned the bash installer's output to match the Go CLI's visual hierarchy: -- `log_success`/`log_warn` gained 2-space indent (subordinate to `log_info`) -- Banner replaced from Unicode box (`╔═══╗`) to ASCII art logo (matches `obol --help`) -- Added `log_dim()` function and `DIM`/`BOLD` ANSI codes -- Instruction blocks indented consistently (2-space for text, 4-space for commands) - -**Files modified**: `obolup.sh` - -## Dependencies Added - -``` -github.com/charmbracelet/lipgloss — styles, colors, NO_COLOR support, TTY degradation -``` - -Transitive: `muesli/termenv`, `lucasb-eyer/go-colorful`, `mattn/go-runewidth`, `rivo/uniseg`, `xo/terminfo`. `mattn/go-isatty` was already an indirect dep. - -## Files Inventory - -**New files (7)**: -- `internal/ui/ui.go` -- `internal/ui/output.go` -- `internal/ui/exec.go` -- `internal/ui/spinner.go` -- `internal/ui/prompt.go` -- `internal/ui/errors.go` -- `internal/ui/suggest.go` - -**Modified Go files (~25)**: -- `go.mod`, `go.sum` -- `cmd/obol/main.go`, `cmd/obol/bootstrap.go`, `cmd/obol/network.go`, `cmd/obol/openclaw.go`, `cmd/obol/model.go`, `cmd/obol/update.go` -- `internal/stack/stack.go`, `internal/stack/backend.go`, `internal/stack/backend_k3d.go`, `internal/stack/backend_k3s.go` -- `internal/network/network.go` -- `internal/openclaw/openclaw.go` -- `internal/app/app.go` -- `internal/agent/agent.go` -- `internal/update/update.go` -- `internal/tunnel/tunnel.go`, `internal/tunnel/login.go`, `internal/tunnel/provision.go` -- `internal/model/model.go` -- `internal/stack/backend_test.go`, `internal/stack/stack_test.go`, `internal/openclaw/skills_injection_test.go` - -**Modified shell (1)**: -- `obolup.sh` - -## Verification - -1. `go build ./...` — compiles clean -2. `go vet ./...` — no issues -3. `go test ./...` — all 7 test packages pass -4. `bash -n obolup.sh` — syntax valid -5. `obol netwerk` — shows "Did you mean? obol network" -6. `obol --quiet network list` — suppresses output -7. `obol network list` — shows colored output with bold headers -8. `obol app install` — shows colored `✗` error with examples