diff --git a/.agents/skills/obol-stack-dev/references/llm-routing.md b/.agents/skills/obol-stack-dev/references/llm-routing.md index 49e014d1..2e7c04d8 100644 --- a/.agents/skills/obol-stack-dev/references/llm-routing.md +++ b/.agents/skills/obol-stack-dev/references/llm-routing.md @@ -53,7 +53,6 @@ obol model remove qwen3.5:9b obol model remove qwen3.5:4b obol model setup custom \ - --name spark1-vllm \ --endpoint http://192.168.18.23:8000/v1 \ --model qwen36-deep # `setup custom` validates the endpoint, patches LiteLLM, and internally calls @@ -64,7 +63,7 @@ obol model list # confirm the custom entry is the only local model obol model status # provider state ``` -The flow scripts (`flows/lib.sh::route_llm_via_obol_cli`) wrap this exact sequence behind `OBOL_LLM_ENDPOINT` / `OBOL_LLM_MODEL` / `OBOL_LLM_NAME` / `OBOL_LLM_API_KEY` env vars so smoke tests target a GPU host without burning host CPU on local Ollama. +The flow scripts (`flows/lib.sh::route_llm_via_obol_cli`) wrap this exact sequence behind `OBOL_LLM_ENDPOINT` / `OBOL_LLM_MODEL` / `OBOL_LLM_API_KEY` env vars so smoke tests target a GPU host without burning host CPU on local Ollama. ## Paid Routing (`paid/`) diff --git a/CLAUDE.md b/CLAUDE.md index 04a71396..447496ea 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -244,7 +244,6 @@ obol model remove qwen3.5:9b obol model remove qwen3.5:4b obol model setup custom \ - --name spark1-vllm \ --endpoint http://192.168.18.23:8000/v1 \ --model qwen36-deep # `setup custom` validates the endpoint, patches LiteLLM, and internally calls @@ -259,7 +258,7 @@ obol model list # confirm head of obol model status # show provider state ``` -The flow scripts (`flows/lib.sh:route_llm_via_obol_cli`) wrap this exact sequence behind `OBOL_LLM_ENDPOINT` / `OBOL_LLM_MODEL` / `OBOL_LLM_NAME` / `OBOL_LLM_API_KEY` env vars, so smoke tests can target a GPU host without burning host CPU on local Ollama. +The flow scripts (`flows/lib.sh:route_llm_via_obol_cli`) wrap this exact sequence behind `OBOL_LLM_ENDPOINT` / `OBOL_LLM_MODEL` / `OBOL_LLM_API_KEY` env vars, so smoke tests can target a GPU host without burning host CPU on local Ollama. **Per-instance overlay**: `buildLiteLLMRoutedOverlay()` reuses "ollama" provider slot pointing at `litellm.llm.svc:4000/v1` with `api: openai-completions`. App → litellm:4000 → routes by model name → actual API. diff --git a/cmd/obol/model.go b/cmd/obol/model.go index f7c402e8..c47568ee 100644 --- a/cmd/obol/model.go +++ b/cmd/obol/model.go @@ -264,7 +264,6 @@ func modelSetupCustomCommand(cfg *config.Config) *cli.Command { Name: "custom", Usage: "Add a custom OpenAI-compatible endpoint (validates before adding)", Flags: []cli.Flag{ - &cli.StringFlag{Name: "name", Usage: "Short label for the endpoint (informational only — LiteLLM keys the route by --model, not --name)", Required: true}, &cli.StringFlag{Name: "endpoint", Usage: "Full base URL (e.g. http://host:8000/v1)", Required: true}, &cli.StringFlag{Name: "model", Usage: "Model identifier at the endpoint — this is also the LiteLLM model_name the agent will call", Required: true}, &cli.StringFlag{Name: "api-key", Usage: "API key (optional, some endpoints don't require it)"}, @@ -272,12 +271,11 @@ func modelSetupCustomCommand(cfg *config.Config) *cli.Command { }, Action: func(ctx context.Context, cmd *cli.Command) error { u := getUI(cmd) - name := cmd.String("name") endpoint := cmd.String("endpoint") modelName := cmd.String("model") apiKey := cmd.String("api-key") - if err := model.AddCustomEndpoint(cfg, u, name, endpoint, modelName, apiKey); err != nil { + if err := model.AddCustomEndpoint(cfg, u, endpoint, modelName, apiKey); err != nil { return err } diff --git a/flows/buy-external.sh b/flows/buy-external.sh index a386312e..705ad26d 100755 --- a/flows/buy-external.sh +++ b/flows/buy-external.sh @@ -61,7 +61,6 @@ # EXTERNAL_LOG_BLOCKS_BACK default: 30 (~6 min on Base Sepolia at 2s/blk) # OBOL_LLM_ENDPOINT default: http://127.0.0.1:8000/v1 # OBOL_LLM_MODEL default: qwen36-deep (27B-class) -# OBOL_LLM_NAME default: external-llm # # Exit code: 0 on PASS (every step pass), 1 on any FAIL. @@ -107,7 +106,6 @@ EXTERNAL_LOG_BLOCKS_BACK="${EXTERNAL_LOG_BLOCKS_BACK:-30}" OBOL_LLM_ENDPOINT="${OBOL_LLM_ENDPOINT:-http://127.0.0.1:8000/v1}" OBOL_LLM_MODEL="${OBOL_LLM_MODEL:-qwen36-deep}" -OBOL_LLM_NAME="${OBOL_LLM_NAME:-external-llm}" # Resolve OBOL_ROOT before sourcing helpers — lib.sh re-derives it but # operating on the canonical path simplifies later relative paths. @@ -449,7 +447,7 @@ detect_buyer_runtime bob # ───────────────────────────────────────────────────────────────── # STEP 5: Repoint LiteLLM at OBOL_LLM_ENDPOINT and add the live RPC route # ───────────────────────────────────────────────────────────────── -step "Bob: route LiteLLM via $OBOL_LLM_NAME ($OBOL_LLM_MODEL)" +step "Bob: route LiteLLM via $OBOL_LLM_MODEL ($OBOL_LLM_ENDPOINT)" if route_llm_via_obol_cli bob; then pass "LiteLLM routed via $OBOL_LLM_ENDPOINT" else diff --git a/flows/lib.sh b/flows/lib.sh index a9972f13..e1121b48 100755 --- a/flows/lib.sh +++ b/flows/lib.sh @@ -572,12 +572,10 @@ bootstrap_flow_workspace() { # OBOL_LLM_MODEL is the upstream model id (default qwen36-deep, 27B-class). # qwen36-fast (4B) is faster but flakes on long single-shot agent prompts; see # the flow-13/14 step 46 retry-wrapper rationale in lib-dual-stack.sh. -# OBOL_LLM_NAME is the LiteLLM short name registered for the endpoint (default -# external-llm). # # Sequence (all model edits use --no-sync so we trigger only one Hermes # helmfile rollout at the end): -# 1. obol model setup custom --name … --endpoint … --model … --no-sync +# 1. obol model setup custom --endpoint … --model … --no-sync # (validates the endpoint, patches LiteLLM, hot-adds the model.) # 2. obol model prefer --no-sync # (configured LiteLLM order is the primary-model contract.) @@ -587,13 +585,12 @@ bootstrap_flow_workspace() { # Each peer (alice/bob) routes independently — caller passes the runner. route_llm_via_obol_cli() { local runner=$1 - local model name + local model if [ -n "${OBOL_LLM_ENDPOINT:-}" ]; then model="${OBOL_LLM_MODEL:-qwen36-deep}" - name="${OBOL_LLM_NAME:-external-llm}" - local args=(model setup custom --no-sync --name "$name" --endpoint "$OBOL_LLM_ENDPOINT" --model "$model") + local args=(model setup custom --no-sync --endpoint "$OBOL_LLM_ENDPOINT" --model "$model") if [ -n "${OBOL_LLM_API_KEY:-}" ]; then args+=(--api-key "$OBOL_LLM_API_KEY") fi diff --git a/internal/embed/skills/monetize-guide/SKILL.md b/internal/embed/skills/monetize-guide/SKILL.md index bb342107..14b11d56 100644 --- a/internal/embed/skills/monetize-guide/SKILL.md +++ b/internal/embed/skills/monetize-guide/SKILL.md @@ -143,7 +143,7 @@ Two steps: first bridge the endpoint into LiteLLM, then sell LiteLLM. ```bash # Step A: Add the external endpoint to LiteLLM -obol model setup custom --name \ +obol model setup custom \ --endpoint \ --model "" diff --git a/internal/model/model.go b/internal/model/model.go index 3239a73b..1b8042ea 100644 --- a/internal/model/model.go +++ b/internal/model/model.go @@ -804,13 +804,11 @@ func RemoveModel(cfg *config.Config, u *ui.UI, modelName string) error { // because the agent then strips it and calls LiteLLM with a key that doesn't // match. // -// The `name` arg is informational only. It is surfaced via -// `obol model status` / `list` for human reference but does NOT participate -// in the LiteLLM route key. Two custom endpoints that publish the same -// `modelName` will overwrite each other in the LiteLLM ConfigMap; that is -// the natural "repoint my model" behavior an operator running -// `obol model setup custom` wants when they re-run the command. -func AddCustomEndpoint(cfg *config.Config, u *ui.UI, name, endpoint, modelName, apiKey string) error { +// Two custom endpoints that publish the same `modelName` will overwrite +// each other in the LiteLLM ConfigMap; that is the natural "repoint my +// model" behavior an operator running `obol model setup custom` wants when +// they re-run the command. +func AddCustomEndpoint(cfg *config.Config, u *ui.UI, endpoint, modelName, apiKey string) error { kubectlBinary := filepath.Join(cfg.BinDir, "kubectl") kubeconfigPath := filepath.Join(cfg.ConfigDir, "kubeconfig.yaml") @@ -840,14 +838,7 @@ func AddCustomEndpoint(cfg *config.Config, u *ui.UI, name, endpoint, modelName, entry := buildCustomEndpointEntry(modelName, clusterEndpoint, apiKey) - // Patch ConfigMap for persistence. The display label is logged so an - // operator can correlate the call with their `--name` arg, but it isn't - // part of the route key. - if name != "" { - u.Infof("Adding custom endpoint %q (model: %s) to LiteLLM config", name, modelName) - } else { - u.Infof("Adding custom endpoint (model: %s) to LiteLLM config", modelName) - } + u.Infof("Adding custom endpoint (model: %s) to LiteLLM config", modelName) if err := patchLiteLLMConfig(kubectlBinary, kubeconfigPath, []ModelEntry{entry}); err != nil { return fmt.Errorf("failed to update LiteLLM config: %w", err) @@ -856,10 +847,10 @@ func AddCustomEndpoint(cfg *config.Config, u *ui.UI, name, endpoint, modelName, // Hot-add via API (no restart needed). if err := hotAddModels(cfg, u, []ModelEntry{entry}); err != nil { u.Warnf("Hot-add failed, falling back to restart: %v", err) - return RestartLiteLLM(cfg, u, name) + return RestartLiteLLM(cfg, u, modelName) } - u.Successf("Custom endpoint %q added (model: %s)", name, modelName) + u.Successf("Custom endpoint added (model: %s)", modelName) return nil }