diff --git a/docs/configuration/hooks/index.md b/docs/configuration/hooks/index.md
index cc635644e..a4a11744b 100644
--- a/docs/configuration/hooks/index.md
+++ b/docs/configuration/hooks/index.md
@@ -259,7 +259,7 @@ In addition to the common fields, each event ships its own payload:
 | `turn_start`                | _none_ (just the common fields)                                                                                       |
 | `turn_end`                  | `agent_name`, `reason` — one of `normal`, `continue`, `steered`, `error`, `canceled`, `hook_blocked`, `loop_detected` |
 | `before_llm_call`           | `iteration` — 1-based run-loop iteration counter (the model call this hook is gating), `model_id`                    |
-| `after_llm_call`            | `agent_name`, `stop_response`, `last_user_message`, `model_id`                                                       |
+| `after_llm_call`            | `agent_name`, `stop_response`, `last_user_message`, `model_id`, `usage`, `cost`                                       |
 | `session_end`               | `reason` — one of `clear`, `logout`, `prompt_input_exit`, `other`                                                     |
 | `pre_compact`               | `source` — one of `manual`, `auto`, `overflow`, `tool_overflow`                                                       |
 | `before_compaction`         | `input_tokens`, `output_tokens`, `context_limit`, `compaction_reason` (one of `threshold`/`overflow`/`manual`)        |
@@ -281,6 +281,9 @@ Notes:
 - `prompt` is only populated for `user_prompt_submit`. Sub-sessions (transferred tasks, background agents, skills) do **not** fire this event because their kick-off message is synthesised by the runtime, not authored by the user.
 - `stop_response` carries the model's final assistant text for `stop`, `after_llm_call`, and `subagent_stop`. `last_user_message` carries the latest user message at dispatch time.
 - `model_id` is populated for `after_llm_call` (and `before_llm_call`) in the canonical `<provider>/<model>` form (e.g. `anthropic/claude-sonnet-4-5`). For harness agents, `model_id` is the harness label (e.g. `claude-code`) rather than a canonical model name — see [Coding Harnesses]({{ '/features/harnesses/' | relative_url }}).
+- `usage` and `cost` are populated for `after_llm_call` only. `usage` is the per-call token usage object (`input_tokens`, `output_tokens`, `cached_input_tokens`, `cached_write_tokens`, and `reasoning_tokens` — the last is itself omitted for non-reasoning models); the whole object is absent when the provider reported no usage. `cost` is the USD price of that one model response. For a **native model call** it is the price computed from `usage` and the model's pricing table, and equals the cost the session records for the turn: it is **absent** when the response is unpriced (no pricing data on file, or no usage) and an explicit `0` for a priced call that was free — so a present `cost` is authoritative and an absent one means "unpriced", with no need to cross-check `usage`. (For harness agents the meaning differs — see the next note.) A cost ledger can therefore record per-call spend from the payload alone, without subscribing to the runtime event channel.
+- For [harness agents]({{ '/features/harnesses/' | relative_url }}), `cost` is the harness's own reported total for the call rather than a computed price, and is present only when the harness reported a non-zero cost (some harnesses, e.g. `codex`, report token counts but no cost — those turns carry `usage` with `cost` absent, even though the recorded message stores `0`).
+- `after_llm_call` fires for **every** model call, including calls made inside sub-sessions (transferred tasks, background agents, skills). For those, `session_id` is the sub-session's id. Summing `cost` across `after_llm_call` events therefore captures **all** spend, including sub-sessions (and even sub-sessions that error before their cost is persisted). Do **not** add a separately-queried session cost total on top: the runtime's own total already recurses into and includes completed sub-session spend, so combining the two double-counts. Pick one source — the summed hook costs — as the authoritative ledger.
 - `context_limit` is `0` when the model definition is unavailable (treat `0` as "unknown", not as a real limit).
 - `approval_decision` is one of `allow`, `deny`, `canceled`. `approval_source` is a stable classifier of which step decided (e.g. `yolo`, `session_permissions_allow`, `session_permissions_deny`, `team_permissions_allow`, `team_permissions_deny`, `pre_tool_use_hook_allow`, `pre_tool_use_hook_deny`, `readonly_hint`, `user_approved`, `user_approved_session`, `user_approved_tool`, `user_rejected`, `context_canceled`).
 
@@ -552,7 +555,7 @@ The `reason` field classifies the exit:
 
 `before_llm_call` fires immediately before every model call (after `turn_start` has assembled the messages). It cannot contribute context — use `turn_start` for that — but it can **stop the run** by returning `decision: block` (or exit code 2). The built-in `max_iterations` hook implements a hard cap on top of this event.
 
-`after_llm_call` fires immediately after each successful model call, before the response is recorded into the session and tool calls are dispatched. The assistant text is in `stop_response`. Use it for response auditing, redaction logging, or quality metrics. Failed model calls fire `on_error` instead.
+`after_llm_call` fires immediately after each successful model call, before the response is recorded into the session and tool calls are dispatched. The assistant text is in `stop_response`, and the call's `usage` and `cost` carry the per-turn token usage and computed USD spend (see the field notes above). Use it for response auditing, redaction logging, quality metrics, or a sidecar cost ledger that records per-call spend without subscribing to the runtime event channel. Failed model calls fire `on_error` instead.
 
 ### Before/After-Compaction: structured compaction control
 
diff --git a/examples/hooks.yaml b/examples/hooks.yaml
index 5ac38c416..f584b2921 100644
--- a/examples/hooks.yaml
+++ b/examples/hooks.yaml
@@ -65,6 +65,7 @@
 #   /tmp/agent-session.log         (session_start, session_end)
 #   /tmp/agent-prompts.log         (user_prompt_submit)
 #   /tmp/agent-llm-calls.log       (before_llm_call, after_llm_call)
+#   /tmp/agent-cost-ledger.csv     (after_llm_call: per-call token usage + cost)
 #   /tmp/agent-turns.log           (turn_end)
 #   /tmp/agent-tool-results.log    (post_tool_use)
 #   /tmp/agent-permissions.log     (permission_request)
@@ -277,6 +278,14 @@ agents:
       # assistant text content arrives via stop_response (matching the
       # stop event's payload). Failed calls fire on_error instead and
       # skip this event.
+      #
+      # The payload also carries this call's token usage in .usage and its
+      # computed USD cost in .cost. .cost is ABSENT for an unpriced model
+      # (test with `has("cost")`) and an explicit 0 for a priced free call,
+      # so a present cost is authoritative without checking usage. That is
+      # everything a sidecar cost ledger needs — no event-channel wiring.
+      # after_llm_call also fires for sub-session turns (each with its own
+      # session_id), so summing .cost is the full spend for the run.
       # ====================================================================
       after_llm_call:
         - type: command
@@ -286,6 +295,12 @@ agents:
             SESSION_ID=$(echo "$INPUT" | jq -r '.session_id // "unknown"')
             LEN=$(echo "$INPUT" | jq -r '.stop_response // ""' | wc -c | tr -d ' ')
             echo "[$(date)] [←] $SESSION_ID llm call complete, content=$LEN chars" >> /tmp/agent-llm-calls.log
+            # Per-call cost ledger: timestamp, session, model, tokens, cost.
+            echo "$INPUT" | jq -r '[
+              (now | todateiso8601), .session_id, .model_id,
+              (.usage.input_tokens // 0), (.usage.output_tokens // 0),
+              (if has("cost") then (.cost | tostring) else "unpriced" end)
+            ] | @csv' >> /tmp/agent-cost-ledger.csv
 
       # ====================================================================
       # SESSION-END - cleanup when the session terminates.
diff --git a/pkg/hooks/types.go b/pkg/hooks/types.go
index 06e16be3f..f6a602ef0 100644
--- a/pkg/hooks/types.go
+++ b/pkg/hooks/types.go
@@ -68,6 +68,12 @@ const (
 	EventBeforeLLMCall EventType = "before_llm_call"
 	// EventAfterLLMCall fires immediately after a successful model call,
 	// before the response is recorded. Failed calls fire EventOnError.
+	// The Input carries the response text in [Input.StopResponse]
+	// (matching the stop event), the model that produced it in
+	// [Input.ModelID], and per-turn billing data in [Input.Usage] and
+	// [Input.Cost] so sidecar cost ledgers can record per-call spend
+	// from the payload alone, without subscribing to the runtime event
+	// channel.
 	EventAfterLLMCall EventType = "after_llm_call"
 	// EventSessionEnd fires when a session terminates.
 	EventSessionEnd EventType = "session_end"
@@ -293,6 +299,36 @@ type Input struct {
 	ApprovalDecision string `json:"approval_decision,omitempty"`
 	ApprovalSource   string `json:"approval_source,omitempty"`
 
+	// AfterLLMCall specific: per-turn token usage and the computed USD
+	// cost of the model response the runtime just received. Both are
+	// populated only for [EventAfterLLMCall] and are nil for every
+	// other event. They are the hook-side counterpart of the runtime's
+	// internal TokenUsageEvent and let sidecar cost ledgers record
+	// per-call spend from the payload alone.
+	//
+	// Usage is a pointer so a handler can distinguish "the provider
+	// reported no usage" (nil) from "usage was zero".
+	//
+	// Cost is a *float64 with three meaningful states, mirroring the
+	// runtime's own pricing gate (usage present AND a model definition
+	// with a pricing table):
+	//   - nil   → unpriced: the model has no pricing data on file
+	//             (unknown model ID, custom endpoint without cost
+	//             config) or the provider reported no usage. With
+	//             omitempty the "cost" key is absent on the wire.
+	//   - 0     → a priced model whose computed cost is genuinely zero
+	//             (a free call). Emitted as "cost": 0, NOT elided —
+	//             omitempty on a pointer drops only nil, never a
+	//             non-nil pointer to the zero value.
+	//   - non-0 → the priced USD cost of this single response.
+	// A handler therefore reads a present "cost" as authoritative and
+	// an absent one as "unpriced", with no need to cross-check usage.
+	// (This is deliberately a *float64, unlike [chat.Message.Cost],
+	// which is a plain float64 with omitempty and so cannot distinguish
+	// a free priced call from an unpriced one on the wire.)
+	Usage *chat.Usage `json:"usage,omitempty"`
+	Cost  *float64    `json:"cost,omitempty"`
+
 	// Compaction fields (BeforeCompaction, AfterCompaction).
 	InputTokens  int64 `json:"input_tokens,omitempty"`
 	OutputTokens int64 `json:"output_tokens,omitempty"`
diff --git a/pkg/runtime/after_llm_call_test.go b/pkg/runtime/after_llm_call_test.go
index 2f0f519d8..4eb8c6045 100644
--- a/pkg/runtime/after_llm_call_test.go
+++ b/pkg/runtime/after_llm_call_test.go
@@ -2,6 +2,9 @@ package runtime
 
 import (
 	"context"
+	"encoding/json"
+	"os"
+	stdruntime "runtime"
 	"sync/atomic"
 	"testing"
 
@@ -9,12 +12,29 @@ import (
 	"github.com/stretchr/testify/require"
 
 	"github.com/docker/docker-agent/pkg/agent"
+	"github.com/docker/docker-agent/pkg/chat"
 	"github.com/docker/docker-agent/pkg/config/latest"
 	"github.com/docker/docker-agent/pkg/hooks"
+	"github.com/docker/docker-agent/pkg/modelsdev"
 	"github.com/docker/docker-agent/pkg/session"
 	"github.com/docker/docker-agent/pkg/team"
 )
 
+// mockModelStoreWithCost returns a model carrying a fixed pricing
+// table so after_llm_call can compute a non-nil per-turn cost. The
+// zero mockModelStore returns a nil model, which exercises the
+// unpriced (nil cost) path instead.
+type mockModelStoreWithCost struct {
+	ModelStore
+
+	cost modelsdev.Cost
+}
+
+func (m mockModelStoreWithCost) GetModel(_ context.Context, _ modelsdev.ID) (*modelsdev.Model, error) {
+	c := m.cost
+	return &modelsdev.Model{Cost: &c}, nil
+}
+
 // TestAfterLLMCallHook_PopulatesModelID is a regression test for the
 // doc/impl mismatch where [hooks.Input.ModelID] is documented as
 // populated for after_llm_call but executeAfterLLMCallHooks never
@@ -74,3 +94,244 @@ func TestAfterLLMCallHook_PopulatesModelID(t *testing.T) {
 		"after_llm_call payload must include the canonical model id; "+
 			"see pkg/hooks/types.go:177-186 for the documented contract")
 }
+
+// captureAfterLLMCall runs a single successful turn against the given
+// model store and returns the after_llm_call payload the runtime
+// dispatched, together with the session so callers can cross-check the
+// hook cost against what the session recorded. Usage is fixed at 10
+// input / 5 output tokens so callers can assert an exact computed cost.
+func captureAfterLLMCall(t *testing.T, store ModelStore) (*hooks.Input, *session.Session) {
+	t.Helper()
+
+	const hookName = "test-after-llm-usage-cost"
+
+	var captured atomic.Pointer[hooks.Input]
+
+	stream := newStreamBuilder().
+		AddContent("ok").
+		AddStopWithUsage(10, 5).
+		Build()
+	prov := &mockProvider{id: "test/mock-model", stream: stream}
+
+	root := agent.New("root", "test agent",
+		agent.WithModel(prov),
+		agent.WithHooks(&latest.HooksConfig{
+			AfterLLMCall: []latest.HookDefinition{
+				{Type: "builtin", Command: hookName},
+			},
+		}),
+	)
+	tm := team.New(team.WithAgents(root))
+
+	rt, err := NewLocalRuntime(tm,
+		WithSessionCompaction(false),
+		WithModelStore(store),
+	)
+	require.NoError(t, err)
+
+	require.NoError(t, rt.hooksRegistry.RegisterBuiltin(
+		hookName,
+		func(_ context.Context, in *hooks.Input, _ []string) (*hooks.Output, error) {
+			snap := *in
+			captured.Store(&snap)
+			return nil, nil
+		},
+	))
+
+	sess := session.New(session.WithUserMessage("hi"))
+	sess.Title = "Unit Test"
+
+	for range rt.RunStream(t.Context(), sess) {
+	}
+
+	got := captured.Load()
+	require.NotNil(t, got, "after_llm_call hook must fire on a successful turn")
+	return got, sess
+}
+
+// TestAfterLLMCallHook_PopulatesUsageAndCost pins the priced-call
+// contract: when the model has a pricing table, after_llm_call carries
+// the provider's token usage and a non-nil Cost equal to the value the
+// runtime records on the assistant message (same computeMessageCost
+// call, threaded to both).
+func TestAfterLLMCallHook_PopulatesUsageAndCost(t *testing.T) {
+	t.Parallel()
+
+	rate := modelsdev.Cost{Input: 2.0, Output: 4.0}
+	in, sess := captureAfterLLMCall(t, mockModelStoreWithCost{cost: rate})
+
+	require.NotNil(t, in.Usage, "Usage must be populated on after_llm_call")
+	assert.Equal(t, int64(10), in.Usage.InputTokens)
+	assert.Equal(t, int64(5), in.Usage.OutputTokens)
+
+	// Same arithmetic as computeMessageCost; inputs chosen for exact
+	// float64 representation so equality is reliable.
+	expected := (float64(10)*rate.Input + float64(5)*rate.Output) / 1e6
+	require.NotNil(t, in.Cost, "Cost must be non-nil for a priced model")
+	assert.InDelta(t, expected, *in.Cost, 1e-9,
+		"hook Cost must equal computeMessageCost(usage, model)")
+
+	// The headline guarantee: the cost the hook reports is the same
+	// cost the session bills for the turn. OwnCost sums the recorded
+	// assistant message's Cost, set from the same computeMessageCost
+	// value threaded into recordAssistantMessage.
+	assert.InDelta(t, *in.Cost, sess.OwnCost(), 1e-9,
+		"hook Cost must equal the cost the session recorded for the turn")
+}
+
+// TestAfterLLMCallHook_CostNilWhenUnpriced pins the unpriced contract:
+// when the model has no pricing data (the zero mockModelStore returns a
+// nil model), Usage is still populated but Cost is nil — the signal a
+// sidecar reads as "this model is unpriced", distinct from a priced
+// free call (a non-nil pointer to 0).
+func TestAfterLLMCallHook_CostNilWhenUnpriced(t *testing.T) {
+	t.Parallel()
+
+	in, _ := captureAfterLLMCall(t, mockModelStore{})
+
+	require.NotNil(t, in.Usage,
+		"Usage must still be populated even when the model is unpriced")
+	assert.Equal(t, int64(10), in.Usage.InputTokens)
+	assert.Nil(t, in.Cost,
+		"Cost must be nil for an unpriced model so handlers can "+
+			"distinguish it from a priced free call (pointer to 0)")
+}
+
+// TestAfterLLMCallInput_CostJSONContract pins the wire format sidecar
+// scripts depend on. With Cost as a *float64 + omitempty:
+//   - nil   → the "cost" key is absent (unpriced),
+//   - &0    → "cost": 0 is present, NOT elided (priced free call —
+//     omitempty drops only nil pointers, never a pointer to 0),
+//   - &N    → "cost": N.
+//
+// The same nil-omitted rule applies to Usage, keeping every non-
+// after_llm_call event's payload free of spurious cost/usage keys.
+func TestAfterLLMCallInput_CostJSONContract(t *testing.T) {
+	t.Parallel()
+
+	marshalKeys := func(in *hooks.Input) map[string]any {
+		b, err := json.Marshal(in)
+		require.NoError(t, err)
+		var m map[string]any
+		require.NoError(t, json.Unmarshal(b, &m))
+		return m
+	}
+
+	t.Run("unpriced omits cost and usage", func(t *testing.T) {
+		t.Parallel()
+		m := marshalKeys(&hooks.Input{HookEventName: hooks.EventAfterLLMCall})
+		_, hasCost := m["cost"]
+		_, hasUsage := m["usage"]
+		assert.False(t, hasCost, "nil Cost must be omitted, not emitted as null")
+		assert.False(t, hasUsage, "nil Usage must be omitted")
+	})
+
+	t.Run("priced free call emits explicit zero", func(t *testing.T) {
+		t.Parallel()
+		zero := 0.0
+		m := marshalKeys(&hooks.Input{
+			HookEventName: hooks.EventAfterLLMCall,
+			Usage:         &chat.Usage{InputTokens: 1, OutputTokens: 1},
+			Cost:          &zero,
+		})
+		raw, hasCost := m["cost"]
+		require.True(t, hasCost,
+			"a non-nil pointer to 0 must emit \"cost\": 0, not be elided — "+
+				"this is what distinguishes a free priced call from an unpriced model")
+		assert.InDelta(t, float64(0), raw, 1e-9)
+		_, hasUsage := m["usage"]
+		assert.True(t, hasUsage, "Usage must be present when set")
+	})
+
+	t.Run("priced call emits the value", func(t *testing.T) {
+		t.Parallel()
+		v := 0.0125
+		m := marshalKeys(&hooks.Input{HookEventName: hooks.EventAfterLLMCall, Cost: &v})
+		assert.InDelta(t, 0.0125, m["cost"], 1e-9)
+	})
+}
+
+// TestAfterLLMCallHook_HarnessUsageWithoutCostIsUnpriced pins the
+// harness cost gate. The codex harness reports token counts via
+// turn.completed but never a cost, so the harness library's
+// TotalCostUSD defaults to 0. That 0 must be treated as unpriced (nil
+// cost on the hook), NOT as a free priced call (cost 0) — otherwise a
+// cost ledger would record a real, billed harness turn as $0.
+func TestAfterLLMCallHook_HarnessUsageWithoutCostIsUnpriced(t *testing.T) {
+	if stdruntime.GOOS == "windows" {
+		t.Skip("shell script shim test")
+	}
+
+	const hookName = "test-after-llm-harness-cost"
+
+	binDir := t.TempDir()
+	writeHarnessScript(t, binDir, "codex", `#!/bin/sh
+printf '%s\n' '{"type":"item.completed","item":{"type":"agent_message","text":"harness done"}}'
+printf '%s\n' '{"type":"turn.completed","usage":{"input_tokens":120,"output_tokens":30}}'
+`)
+	t.Setenv("PATH", binDir+string(os.PathListSeparator)+os.Getenv("PATH"))
+
+	var captured atomic.Pointer[hooks.Input]
+
+	root := agent.New("root", "You are an external coder.",
+		agent.WithHarness(&latest.HarnessConfig{Type: "codex"}),
+		agent.WithHooks(&latest.HooksConfig{
+			AfterLLMCall: []latest.HookDefinition{{Type: "builtin", Command: hookName}},
+		}),
+	)
+	rt, err := NewLocalRuntime(team.New(team.WithAgents(root)),
+		WithSessionCompaction(false), WithModelStore(mockModelStore{}))
+	require.NoError(t, err)
+
+	require.NoError(t, rt.hooksRegistry.RegisterBuiltin(
+		hookName,
+		func(_ context.Context, in *hooks.Input, _ []string) (*hooks.Output, error) {
+			snap := *in
+			captured.Store(&snap)
+			return nil, nil
+		},
+	))
+
+	sess := session.New(session.WithUserMessage("do the task"))
+	sess.Title = "Harness Unit Test"
+	for range rt.RunStream(t.Context(), sess) {
+	}
+
+	in := captured.Load()
+	require.NotNil(t, in, "after_llm_call must fire for a harness turn")
+	require.NotNil(t, in.Usage, "harness usage must be forwarded to the hook")
+	assert.Equal(t, int64(120), in.Usage.InputTokens)
+	assert.Equal(t, int64(30), in.Usage.OutputTokens)
+	assert.Nil(t, in.Cost,
+		"a harness that reports no cost must yield nil cost (unpriced), not 0 (free)")
+}
+
+// TestComputeMessageCost unit-tests the single cost-arithmetic source
+// shared by the persisted message and the after_llm_call payload,
+// including every branch that yields nil (unpriced).
+func TestComputeMessageCost(t *testing.T) {
+	t.Parallel()
+
+	rate := &modelsdev.Cost{Input: 2.0, Output: 4.0, CacheRead: 1.0, CacheWrite: 5.0}
+
+	t.Run("nil usage is unpriced", func(t *testing.T) {
+		t.Parallel()
+		assert.Nil(t, computeMessageCost(nil, &modelsdev.Model{Cost: rate}))
+	})
+	t.Run("nil model is unpriced", func(t *testing.T) {
+		t.Parallel()
+		assert.Nil(t, computeMessageCost(&chat.Usage{InputTokens: 1}, nil))
+	})
+	t.Run("model without pricing table is unpriced", func(t *testing.T) {
+		t.Parallel()
+		assert.Nil(t, computeMessageCost(&chat.Usage{InputTokens: 1}, &modelsdev.Model{}))
+	})
+	t.Run("priced computes from all token classes", func(t *testing.T) {
+		t.Parallel()
+		usage := &chat.Usage{InputTokens: 10, OutputTokens: 5, CachedInputTokens: 4, CacheWriteTokens: 2}
+		got := computeMessageCost(usage, &modelsdev.Model{Cost: rate})
+		require.NotNil(t, got)
+		expected := (10*rate.Input + 5*rate.Output + 4*rate.CacheRead + 2*rate.CacheWrite) / 1e6
+		assert.InDelta(t, expected, *got, 1e-9)
+	})
+}
diff --git a/pkg/runtime/harness.go b/pkg/runtime/harness.go
index c48b380f8..f2de19cb0 100644
--- a/pkg/runtime/harness.go
+++ b/pkg/runtime/harness.go
@@ -189,7 +189,20 @@ func (r *LocalRuntime) runHarnessAgent(ctx context.Context, sess *session.Sessio
 		content = strings.TrimSpace(finalResult)
 	}
 
-	r.executeAfterLLMCallHooks(ctx, sess, a, modelID, content)
+	// A harness reports its own TotalCostUSD, which the harness
+	// library defaults to 0 whenever the harness output omits a cost
+	// (e.g. the codex harness never reports one). That 0 is
+	// indistinguishable from a genuinely free call, so — to avoid
+	// telling a cost ledger that a billed turn was free — surface cost
+	// only when the harness reported a non-zero value and leave it nil
+	// (unpriced) otherwise. This keeps the wire contract honest: a
+	// present cost is always a real reported figure.
+	var hookCost *float64
+	if cost != 0 {
+		c := cost
+		hookCost = &c
+	}
+	r.executeAfterLLMCallHooks(ctx, sess, a, modelID, content, usage, hookCost)
 	r.recordHarnessAssistantMessage(sess, a, content, modelID, usage, cost, events)
 	r.executeStopHooks(ctx, sess, a, content, events)
 
diff --git a/pkg/runtime/hooks.go b/pkg/runtime/hooks.go
index cca582d90..4431afb04 100644
--- a/pkg/runtime/hooks.go
+++ b/pkg/runtime/hooks.go
@@ -441,15 +441,22 @@ func (r *LocalRuntime) executeBeforeLLMCallHooks(
 // model call, before the response is recorded into the session and
 // tool calls are dispatched. The assistant text content is passed via
 // stop_response (matching the stop event), so handlers can reuse the
-// same parsing logic. Failed model calls fire on_error instead and
-// skip this event.
-func (r *LocalRuntime) executeAfterLLMCallHooks(ctx context.Context, sess *session.Session, a *agent.Agent, modelID, responseContent string) {
+// same parsing logic. The per-turn token usage and computed USD cost
+// are forwarded via [hooks.Input.Usage] and [hooks.Input.Cost] so
+// sidecar cost ledgers can record per-call spend from the payload
+// alone. cost is a *float64 so an unpriced model (nil) is distinct on
+// the wire from a priced free call (a pointer to 0); the caller owns
+// that distinction. Failed model calls fire on_error instead and skip
+// this event.
+func (r *LocalRuntime) executeAfterLLMCallHooks(ctx context.Context, sess *session.Session, a *agent.Agent, modelID, responseContent string, usage *chat.Usage, cost *float64) {
 	r.dispatchHook(ctx, a, hooks.EventAfterLLMCall, &hooks.Input{
 		SessionID:       sess.ID,
 		AgentName:       a.Name(),
 		ModelID:         modelID,
 		StopResponse:    responseContent,
 		LastUserMessage: sess.GetLastUserMessageContent(),
+		Usage:           usage,
+		Cost:            cost,
 	}, nil)
 }
 
diff --git a/pkg/runtime/loop.go b/pkg/runtime/loop.go
index 84b66129a..3548ffd4a 100644
--- a/pkg/runtime/loop.go
+++ b/pkg/runtime/loop.go
@@ -373,8 +373,8 @@ func (r *LocalRuntime) runStreamLoop(ctx context.Context, sess *session.Session,
 		// the actual inference context), then falls back to the models.dev
 		// catalogue. The lookup above is reused inside resolveContextLimit
 		// only when context_size isn't supplied; we keep the explicit call
-		// here because m is also threaded into [recordAssistantMessage] for
-		// per-message cost computation.
+		// here because m is also passed to [computeMessageCost] for
+		// per-turn cost computation.
 		contextLimit := r.resolveContextLimit(ctx, model, modelID)
 		if contextLimit > 0 && r.sessionCompaction && compaction.ShouldCompact(sess.InputTokens, sess.OutputTokens, 0, contextLimit) {
 			r.compactWithReason(ctx, sess, "", compactionReasonThreshold, sink)
@@ -565,11 +565,20 @@ func (r *LocalRuntime) runTurn(
 	// A successful model call resets the overflow compaction counter.
 	ls.overflowCompactions = 0
 
+	// Compute the per-turn cost once, here, so the exact same value
+	// reaches both the after_llm_call hook payload and the recorded
+	// assistant message — the hook's cost is therefore guaranteed to
+	// equal the cost the session bills for this turn. It is nil when
+	// the turn cannot be priced (no usage, or a model with no pricing
+	// table); see computeMessageCost.
+	msgCost := computeMessageCost(res.Usage, m)
+
 	// after_llm_call hooks fire on success only; failed calls
 	// fire on_error above. The assistant text content is passed
 	// via stop_response, matching the stop event's payload, so
-	// handlers can reuse the same parsing.
-	r.executeAfterLLMCallHooks(ctx, sess, a, modelID.String(), res.Content)
+	// handlers can reuse the same parsing. Usage and Cost carry the
+	// per-turn billing data for sidecar cost ledgers.
+	r.executeAfterLLMCallHooks(ctx, sess, a, modelID.String(), res.Content, res.Usage, msgCost)
 
 	if usedModel != nil && usedModel.ID() != model.ID() {
 		slog.InfoContext(ctx, "Used fallback model", "agent", a.Name(), "primary", model.ID().String(), "used", usedModel.ID().String())
@@ -583,7 +592,7 @@ func (r *LocalRuntime) runTurn(
 	endStreamSpan()
 	slog.DebugContext(ctx, "Stream processed", "agent", a.Name(), "tool_calls", len(res.Calls), "content_length", len(res.Content), "stopped", res.Stopped)
 
-	msgUsage := r.recordAssistantMessage(sess, a, res, agentTools, modelID.String(), m, events)
+	msgUsage := r.recordAssistantMessage(sess, a, res, agentTools, modelID.String(), msgCost, events)
 
 	usage := SessionUsage(sess, contextLimit)
 	usage.LastMessage = msgUsage
@@ -701,16 +710,39 @@ func (r *LocalRuntime) Run(ctx context.Context, sess *session.Session) ([]sessio
 	return sess.GetAllMessages(), nil
 }
 
+// computeMessageCost returns the USD cost of a single model response,
+// or nil when the response cannot be priced. It is nil when there is
+// no usage to price (usage == nil) or the model has no pricing table
+// (m == nil — e.g. an unknown model ID or a custom endpoint without
+// cost config — or m.Cost == nil). A non-nil result of 0 therefore
+// means "priced, but this call was free", distinct from "unpriced"
+// (nil). This single arithmetic source feeds both the persisted
+// assistant message (dereferenced to 0 when nil) and the
+// after_llm_call hook payload (which keeps the nil/0 distinction), so
+// the two can never disagree.
+func computeMessageCost(usage *chat.Usage, m *modelsdev.Model) *float64 {
+	if usage == nil || m == nil || m.Cost == nil {
+		return nil
+	}
+	cost := (float64(usage.InputTokens)*m.Cost.Input +
+		float64(usage.OutputTokens)*m.Cost.Output +
+		float64(usage.CachedInputTokens)*m.Cost.CacheRead +
+		float64(usage.CacheWriteTokens)*m.Cost.CacheWrite) / 1e6
+	return &cost
+}
+
 // recordAssistantMessage adds the model's response to the session and returns
 // per-message usage information for the token-usage event. Empty responses
 // (no text and no tool calls) are silently skipped since providers reject them.
+// cost is the precomputed per-turn cost (see computeMessageCost); nil records
+// as 0, matching the previous "no pricing data" behaviour.
 func (r *LocalRuntime) recordAssistantMessage(
 	sess *session.Session,
 	a *agent.Agent,
 	res streamResult,
 	agentTools []tools.Tool,
 	modelID string,
-	m *modelsdev.Model,
+	cost *float64,
 	events EventSink,
 ) *MessageUsage {
 	if strings.TrimSpace(res.Content) == "" && len(res.Calls) == 0 {
@@ -732,13 +764,12 @@ func (r *LocalRuntime) recordAssistantMessage(
 		}
 	}
 
-	// Calculate per-message cost when pricing information is available.
+	// The per-turn cost was computed once in runTurn and threaded in;
+	// nil means the response could not be priced and records as 0,
+	// preserving the previous "no pricing data" behaviour.
 	var messageCost float64
-	if res.Usage != nil && m != nil && m.Cost != nil {
-		messageCost = (float64(res.Usage.InputTokens)*m.Cost.Input +
-			float64(res.Usage.OutputTokens)*m.Cost.Output +
-			float64(res.Usage.CachedInputTokens)*m.Cost.CacheRead +
-			float64(res.Usage.CacheWriteTokens)*m.Cost.CacheWrite) / 1e6
+	if cost != nil {
+		messageCost = *cost
 	}
 
 	messageModel := modelID