From 17eca0878837d2e781807a8fc67b0e85f7f80ae5 Mon Sep 17 00:00:00 2001 From: skobeltsyn Date: Sat, 30 May 2026 09:48:52 +0300 Subject: [PATCH] =?UTF-8?q?feat(#2489):=20humanApproval=20=E2=80=94=20seal?= =?UTF-8?q?ed=20HumanDecision=20+=20audit=20events=20on=20top=20of=20#2488?= =?UTF-8?q?=20interrupt?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit #2489 — second child of the HITL epic (#2487). Promotes the typed approval pattern from the #1918 demo to a runtime feature, layered on the #2488 interrupt primitive. ```kotlin tool("approve_deploy") { args -> humanApproval { title = "Deploy to production?" body = deploymentPlan // typed, @Generable or anything timeout = 30.minutes defaultOnTimeout = HumanDecision.Rejected } // throws AgentInterruptException carrying ApprovalRequest; resume // with one of the four HumanDecision variants } ``` Implementation: - core/HumanApproval.kt — new file. `ApprovalRequest(title, body, timeout, defaultOnTimeout)`. Sealed `HumanDecision { Approved, Rejected, Edited(payload), Responded(payload) }`. `ApprovalBuilder` with fail-fast on blank title. Free function `humanApproval { } : Nothing` builds the request and calls `interrupt(payload = request)`. - core/PipelineEvent.kt — two new variants: * `ApprovalRequested(title, hasBody, timeoutMs, ...)` — fires BEFORE the throw when the runtime detects an `ApprovalRequest` payload. Field-only: title + body-presence + advisory timeout. No body in the audit row (high-volume / PII-sensitive). * `ApprovalDecided(decision, hasPayload, ...)` — fires on resume when `resumeWith` is a `HumanDecision`. `decision` is the variant name; `hasPayload` flags whether Edited/Responded carried one. Payload itself stays off the audit row. Both wired through `Agent.observe { }` so JSONL audit + OTel / LangSmith / Langfuse bridges pick them up. Bridge `when` blocks updated to handle both variants (field-only, mirroring the source). - core/Agent.kt — new `approvalRequestedListener` + `approvalDecidedListener` listener slots, with `onApprovalRequested` / `onApprovalDecided` public DSL setters. Mirror the existing `onToolHallucinated` pattern. - model/AgenticLoop.kt: * In the `PendingInterruptSignal` catch (#2488), if the payload is an `ApprovalRequest`, fire `approvalRequestedListener` under the runtime context. * In the resume entry (#2488), if `resumeWith is HumanDecision`, fire `approvalDecidedListener` with the variant name + payload presence before synthesising the tool result. Composition: - Builds entirely on #2488 interrupt — no new state, no new exception type. `humanApproval` is sugar for `interrupt(ApprovalRequest(...))`. - Manifest-hash restore guard (#2754) applies — pinned by a dedicated test. - Resume path uses the existing `resumeWith` -> `toLlmInput` -> synthesised tool message pipeline (#2488). - Timeout is advisory; the caller honors it (the human reply happens between catch and the next `invokeSuspendResuming` call, outside any runtime suspension). `defaultOnTimeout = Rejected` is the fail-closed default for a regulated runtime. Tests (HumanApprovalTest.kt — 10 cases): - ApprovalRequest payload round-trips on AgentInterruptException - HumanDecision.Approved resumes to text completion - HumanDecision.Rejected — synthesised tool message reflects the decision - HumanDecision.Edited carries a typed @Generable payload - HumanDecision.Responded carries a free-form payload - ApprovalRequested PipelineEvent fires with field-only audit row - ApprovalDecided PipelineEvent fires on resume with HumanDecision - ApprovalDecided does NOT fire when resumeWith is a raw value (gating is type-driven; ApprovalRequested also gated on payload type) - Manifest-hash mismatch refuses to resume the approval snapshot - Blank title fails fast at the builder before interrupt is thrown Full suite: 1757 tests across 7 modules, 0 failures. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../agents_engine/langfuse/LangfuseBridge.kt | 29 ++ .../langsmith/LangSmithBridge.kt | 27 ++ .../kotlin/agents_engine/otel/OtelBridge.kt | 21 ++ src/main/kotlin/agents_engine/core/Agent.kt | 35 +++ .../agents_engine/core/HumanApproval.kt | 143 ++++++++++ .../agents_engine/core/PipelineEvent.kt | 66 +++++ .../kotlin/agents_engine/model/AgenticLoop.kt | 26 ++ .../agents_engine/core/HumanApprovalTest.kt | 265 ++++++++++++++++++ 8 files changed, 612 insertions(+) create mode 100644 src/main/kotlin/agents_engine/core/HumanApproval.kt create mode 100644 src/test/kotlin/agents_engine/core/HumanApprovalTest.kt diff --git a/agents-kt-langfuse/src/main/kotlin/agents_engine/langfuse/LangfuseBridge.kt b/agents-kt-langfuse/src/main/kotlin/agents_engine/langfuse/LangfuseBridge.kt index eb5795f..f79dc11 100644 --- a/agents-kt-langfuse/src/main/kotlin/agents_engine/langfuse/LangfuseBridge.kt +++ b/agents-kt-langfuse/src/main/kotlin/agents_engine/langfuse/LangfuseBridge.kt @@ -173,6 +173,35 @@ class LangfuseBridge internal constructor( ) } } + is PipelineEvent.ApprovalRequested -> { + // #2489 — human approval pause. Field-only (no body / PII). + mostRecentTrace()?.let { state -> + enqueueEventObservation( + trace = state, + name = "agent.approval.requested", + input = mapOf( + "title" to event.title, + "has_body" to event.hasBody, + "timeout_ms" to event.timeoutMs, + ), + metadata = metadata(event.runtimeContext), + ) + } + } + is PipelineEvent.ApprovalDecided -> { + // #2489 — pairs with ApprovalRequested via timestamp ordering. + mostRecentTrace()?.let { state -> + enqueueEventObservation( + trace = state, + name = "agent.approval.decided", + input = mapOf( + "decision" to event.decision, + "has_payload" to event.hasPayload, + ), + metadata = metadata(event.runtimeContext), + ) + } + } } } diff --git a/agents-kt-langsmith/src/main/kotlin/agents_engine/langsmith/LangSmithBridge.kt b/agents-kt-langsmith/src/main/kotlin/agents_engine/langsmith/LangSmithBridge.kt index 0428421..5b155d0 100644 --- a/agents-kt-langsmith/src/main/kotlin/agents_engine/langsmith/LangSmithBridge.kt +++ b/agents-kt-langsmith/src/main/kotlin/agents_engine/langsmith/LangSmithBridge.kt @@ -161,6 +161,33 @@ class LangSmithBridge internal constructor( ) } } + is PipelineEvent.ApprovalRequested -> { + // #2489 — field-only (no body) per the audit-row PII discipline. + mostRecentAgentRun()?.let { state -> + enqueueEvent( + state, + "agent.approval.requested", + mapOf( + "title" to event.title, + "has_body" to event.hasBody, + "timeout_ms" to event.timeoutMs, + ), + ) + } + } + is PipelineEvent.ApprovalDecided -> { + // #2489 — pairs with ApprovalRequested. + mostRecentAgentRun()?.let { state -> + enqueueEvent( + state, + "agent.approval.decided", + mapOf( + "decision" to event.decision, + "has_payload" to event.hasPayload, + ), + ) + } + } } } diff --git a/agents-kt-otel/src/main/kotlin/agents_engine/otel/OtelBridge.kt b/agents-kt-otel/src/main/kotlin/agents_engine/otel/OtelBridge.kt index f7469fe..b6b9f86 100644 --- a/agents-kt-otel/src/main/kotlin/agents_engine/otel/OtelBridge.kt +++ b/agents-kt-otel/src/main/kotlin/agents_engine/otel/OtelBridge.kt @@ -100,6 +100,27 @@ class OtelBridge( .build(), ) } + is PipelineEvent.ApprovalRequested -> { + // #2489 — human approval requested; field-only (no body / PII). + mostRecentAgentSpan()?.addEvent( + "agent.approval.requested", + Attributes.builder() + .put("approval.title", event.title) + .put("approval.has_body", event.hasBody) + .also { b -> event.timeoutMs?.let { b.put("approval.timeout_ms", it) } } + .build(), + ) + } + is PipelineEvent.ApprovalDecided -> { + // #2489 — the resumed HumanDecision; pairs with ApprovalRequested. + mostRecentAgentSpan()?.addEvent( + "agent.approval.decided", + Attributes.builder() + .put("approval.decision", event.decision) + .put("approval.has_payload", event.hasPayload) + .build(), + ) + } } } diff --git a/src/main/kotlin/agents_engine/core/Agent.kt b/src/main/kotlin/agents_engine/core/Agent.kt index 09389a4..497e6e2 100644 --- a/src/main/kotlin/agents_engine/core/Agent.kt +++ b/src/main/kotlin/agents_engine/core/Agent.kt @@ -168,6 +168,24 @@ class Agent( */ var toolHallucinatedListener: ((name: String, args: Map, allowedTools: List) -> Unit)? = null private set + /** + * #2489 — fires when a tool inside the agentic loop calls `humanApproval + * { }` and the runtime is about to pause for human input. Pure + * observability: the runtime still throws [AgentInterruptException]. + * Receives the rendered `title`, whether a `body` is attached, and the + * advisory `timeoutMs`. Body content is omitted by design — see + * [PipelineEvent.ApprovalRequested]. + */ + var approvalRequestedListener: ((title: String, hasBody: Boolean, timeoutMs: Long?) -> Unit)? = null + private set + /** + * #2489 — fires on the resume path when `resumeWith` is a [HumanDecision]. + * Receives the variant name and whether the variant carried a payload + * (Edited/Responded). Body content omitted by design — see + * [PipelineEvent.ApprovalDecided]. + */ + var approvalDecidedListener: ((decision: String, hasPayload: Boolean) -> Unit)? = null + private set private val tokenUsageListeners = mutableListOf<(TokenUsage) -> Unit>() var knowledgeUsedListener: ((name: String, content: String) -> Unit)? = null private set @@ -330,6 +348,23 @@ class Agent( toolHallucinatedListener = block } + /** + * #2489 — Observe `humanApproval { }` requests on this agent's loop. + * Settable post-construction. See [PipelineEvent.ApprovalRequested]. + */ + fun onApprovalRequested(block: (title: String, hasBody: Boolean, timeoutMs: Long?) -> Unit) { + approvalRequestedListener = block + } + + /** + * #2489 — Observe the [HumanDecision] when the resume path synthesises + * a tool result from a `resumeWith` of that type. Settable + * post-construction. See [PipelineEvent.ApprovalDecided]. + */ + fun onApprovalDecided(block: (decision: String, hasPayload: Boolean) -> Unit) { + approvalDecidedListener = block + } + /** * Observe provider-reported token usage for each successful LLM round-trip. * diff --git a/src/main/kotlin/agents_engine/core/HumanApproval.kt b/src/main/kotlin/agents_engine/core/HumanApproval.kt new file mode 100644 index 0000000..bc146e2 --- /dev/null +++ b/src/main/kotlin/agents_engine/core/HumanApproval.kt @@ -0,0 +1,143 @@ +package agents_engine.core + +import kotlin.time.Duration + +/** + * `agents_engine/core/HumanApproval.kt` — first-class human approval gate + * (#2489), built on the interrupt primitive ([interrupt] in + * [agents_engine.core.Interrupt]). Promotes the typed-approval pattern + * sketched by the #1918 demo to a runtime feature. + * + * The shape: + * + * ```kotlin + * tool("approve_deploy") { args -> + * humanApproval { + * title = "Deploy to production?" + * body = deploymentPlan // typed + * timeout = 30.minutes + * defaultOnTimeout = HumanDecision.Rejected + * } + * // ↑ never returns — throws AgentInterruptException carrying the + * // ApprovalRequest. The caller asks the human, then resumes via + * // invokeSuspendResuming(..., resumeWith = ). + * } + * ``` + * + * **Sealed [HumanDecision].** Not a boolean. The four variants — + * `Approved`, `Rejected`, `Edited(payload)`, `Responded(payload)` — + * capture the four real-world outcomes for a typed approval. Edited + * carries the modified plan; Responded carries a free-form reply + * (e.g. "ask the user this clarifying question first"). + * + * **Audit events.** [Agent.observe] subscribers see two new + * [PipelineEvent] variants: [PipelineEvent.ApprovalRequested] (emitted + * by the agentic loop when the interrupt payload is an [ApprovalRequest]) + * and [PipelineEvent.ApprovalDecided] (emitted when the resume path + * synthesises a tool result from a [HumanDecision] `resumeWith`). + * Field-only — no payload bodies in the audit row, since payloads can + * be high-volume or PII-sensitive. Bridges (OTel / LangSmith / + * Langfuse) and the JSONL audit exporter pick them up via the usual + * `observe { }` seam. + * + * **Timeout.** [ApprovalRequest.timeout] and + * [ApprovalRequest.defaultOnTimeout] are advisory — the runtime can't + * honor them inside the suspension because the human reply happens + * BETWEEN `catch (AgentInterruptException)` and the next call to + * `invokeSuspendResuming(...)`. They're carried on the request so the + * caller has a contract for how to behave on expiry: when the + * configured timeout elapses without a reply, the caller should resume + * with `resumeWith = request.defaultOnTimeout`. + * + * Pairs with #2487 (HITL epic) and #2488 (interrupt primitive). + */ + +/** + * A typed request for human input. Surfaced as the payload of + * [AgentInterruptException] when `humanApproval { }` fires. + * + * @property title short prompt rendered to the human (e.g. "Deploy to + * production?"). + * @property body optional context — typed (`@Generable` or anything + * `toLlmInput`-renderable) or null. Typically the plan or artefact + * the human is reviewing. + * @property timeout advisory wall-clock cap on how long the runtime + * would wait if it were managing the timer (which it isn't — see + * class header). Null = no advisory. + * @property defaultOnTimeout the decision the caller should synthesise + * if [timeout] expires without a human reply. Defaults to + * [HumanDecision.Rejected] — fail-closed for sensitive actions is + * the right default for a regulated runtime. + */ +data class ApprovalRequest( + val title: String, + val body: Any? = null, + val timeout: Duration? = null, + val defaultOnTimeout: HumanDecision = HumanDecision.Rejected, +) + +/** + * The sealed result of a human approval request. Caller passes one of + * these as `resumeWith` to `invokeSuspendResuming(...)`: + * + * - [Approved] — proceed. + * - [Rejected] — refuse. Sensitive actions should fail-closed. + * - [Edited] — the human modified the plan; `payload` carries the new + * plan (typically the same type as the original `body`). + * - [Responded] — the human gave a free-form reply (e.g. "first ask + * the user for clarification on X"); `payload` is the reply. + */ +sealed interface HumanDecision { + object Approved : HumanDecision + object Rejected : HumanDecision + data class Edited(val payload: Any?) : HumanDecision + data class Responded(val payload: Any?) : HumanDecision +} + +/** + * DSL builder for [humanApproval]. + */ +class ApprovalBuilder { + var title: String = "" + var body: Any? = null + var timeout: Duration? = null + var defaultOnTimeout: HumanDecision = HumanDecision.Rejected + + internal fun build(): ApprovalRequest { + require(title.isNotBlank()) { "humanApproval { } requires a non-blank title." } + return ApprovalRequest( + title = title, + body = body, + timeout = timeout, + defaultOnTimeout = defaultOnTimeout, + ) + } +} + +/** + * Pause the agentic loop for human approval. Throws — never returns. + * + * Equivalent to constructing an [ApprovalRequest] and calling + * [interrupt] with it. The agentic loop recognises the payload as an + * [ApprovalRequest] and emits [PipelineEvent.ApprovalRequested] for + * audit consumers before throwing. + * + * The caller catches [AgentInterruptException], inspects `payload as + * ApprovalRequest`, asks the human, then resumes via: + * + * ```kotlin + * agent.invokeSuspendResuming( + * input = originalInput, + * resumeFrom = exception.snapshot, + * resumeWith = HumanDecision.Approved, // or .Rejected / .Edited(...) / .Responded(...) + * ) + * ``` + * + * The model sees the [HumanDecision] rendered as JSON (via + * [agents_engine.generation.toLlmInput]) on the synthesised tool + * result message. From its perspective the round-trip is invisible. + */ +fun humanApproval(block: ApprovalBuilder.() -> Unit): Nothing { + val request = ApprovalBuilder().apply(block).build() + interrupt(payload = request) +} diff --git a/src/main/kotlin/agents_engine/core/PipelineEvent.kt b/src/main/kotlin/agents_engine/core/PipelineEvent.kt index 94391e7..3be4450 100644 --- a/src/main/kotlin/agents_engine/core/PipelineEvent.kt +++ b/src/main/kotlin/agents_engine/core/PipelineEvent.kt @@ -114,6 +114,43 @@ sealed interface PipelineEvent { val allowedTools: List, override val runtimeContext: AgentRuntimeContext = AgentRuntimeContext.currentOrNew(), ) : PipelineEvent + + /** + * #2489 — a tool inside the agentic loop called `humanApproval { }` and + * the runtime is about to pause for human input. Emitted before the + * [AgentInterruptException] is thrown, so audit consumers see the request + * on the same wall-clock ordering as the snapshot capture. Field-only + * — `title` is the rendered prompt; `hasBody` indicates whether + * additional context (typed plan, artefact) accompanied the request, + * without copying the body into the audit row (which may be high-volume + * or PII-sensitive). `timeoutMs` is the advisory wall-clock cap the + * caller should honour. + */ + data class ApprovalRequested( + override val agentName: String, + override val timestamp: Instant, + val title: String, + val hasBody: Boolean, + val timeoutMs: Long?, + override val runtimeContext: AgentRuntimeContext = AgentRuntimeContext.currentOrNew(), + ) : PipelineEvent + + /** + * #2489 — the resume path observed a [HumanDecision] in `resumeWith`, + * synthesised the tool result, and is about to continue the loop. + * `decision` is the simple class name of the [HumanDecision] variant + * (Approved / Rejected / Edited / Responded) — `hasPayload` indicates + * whether the Edited/Responded variant carried a non-null payload. + * The payload itself stays off the audit row (same PII discipline as + * [ApprovalRequested.hasBody]). + */ + data class ApprovalDecided( + override val agentName: String, + override val timestamp: Instant, + val decision: String, + val hasPayload: Boolean, + override val runtimeContext: AgentRuntimeContext = AgentRuntimeContext.currentOrNew(), + ) : PipelineEvent } /** @@ -133,6 +170,8 @@ sealed interface PipelineEvent { * - [PipelineEvent.ErrorOccurred] — when an exception is about to propagate out (see [Agent.onError]) * - [PipelineEvent.BudgetThreshold] — when a budget crosses [Agent.onBudgetThreshold]'s threshold * - [PipelineEvent.ToolHallucinated] — when the model emits a tool name not in the skill's allowlist (#2757) + * - [PipelineEvent.ApprovalRequested] — when a tool calls `humanApproval { }` (#2489) + * - [PipelineEvent.ApprovalDecided] — when resume synthesises a result from a [HumanDecision] (#2489) */ fun Agent<*, *>.observe(handler: (PipelineEvent) -> Unit) { val agentName = this.name @@ -208,4 +247,31 @@ fun Agent<*, *>.observe(handler: (PipelineEvent) -> Unit) { ), ) } + + val priorApprovalRequested = this.approvalRequestedListener + onApprovalRequested { title, hasBody, timeoutMs -> + priorApprovalRequested?.invoke(title, hasBody, timeoutMs) + handler( + PipelineEvent.ApprovalRequested( + agentName = agentName, + timestamp = Instant.now(), + title = title, + hasBody = hasBody, + timeoutMs = timeoutMs, + ), + ) + } + + val priorApprovalDecided = this.approvalDecidedListener + onApprovalDecided { decision, hasPayload -> + priorApprovalDecided?.invoke(decision, hasPayload) + handler( + PipelineEvent.ApprovalDecided( + agentName = agentName, + timestamp = Instant.now(), + decision = decision, + hasPayload = hasPayload, + ), + ) + } } diff --git a/src/main/kotlin/agents_engine/model/AgenticLoop.kt b/src/main/kotlin/agents_engine/model/AgenticLoop.kt index c020f99..ffefb09 100644 --- a/src/main/kotlin/agents_engine/model/AgenticLoop.kt +++ b/src/main/kotlin/agents_engine/model/AgenticLoop.kt @@ -254,6 +254,20 @@ internal suspend fun executeAgentic( "Snapshot has pendingInterruptCallId=$pendingCallId but resumeWith was not provided. " + "Pass resumeWith = to invokeSuspendResuming / executeAgentic." } + // #2489 — if resumeWith is a HumanDecision, emit the audit + // event before synthesising the tool result. Renders the + // decision verbatim into the LLM context via toLlmInput. + if (resumeWith is agents_engine.core.HumanDecision) { + val (decisionName, hasPayload) = when (resumeWith) { + agents_engine.core.HumanDecision.Approved -> "Approved" to false + agents_engine.core.HumanDecision.Rejected -> "Rejected" to false + is agents_engine.core.HumanDecision.Edited -> "Edited" to (resumeWith.payload != null) + is agents_engine.core.HumanDecision.Responded -> "Responded" to (resumeWith.payload != null) + } + withAgentRuntimeContext(runtimeContext) { + agent.approvalDecidedListener?.invoke(decisionName, hasPayload) + } + } // toLlmInput renders @Generable typed replies as JSON; strings stay // strings; primitives stay primitives. Matches the existing // tool-result rendering path. The OpenAI adapter pairs tool @@ -693,6 +707,18 @@ internal suspend fun executeAgentic( // onTurnCheckpoint with the snapshot before throwing // so the caller can persist via the same wire path // as a budget Checkpoint. + // #2489 — if the payload is an ApprovalRequest (from + // humanApproval { }), fire the dedicated audit event. + val payload = signal.payload + if (payload is agents_engine.core.ApprovalRequest) { + withAgentRuntimeContext(runtimeContext) { + agent.approvalRequestedListener?.invoke( + payload.title, + payload.body != null, + payload.timeout?.inWholeMilliseconds, + ) + } + } val snapshot = agents_engine.core.SessionSnapshot( messages = messages.toList(), turns = turns, diff --git a/src/test/kotlin/agents_engine/core/HumanApprovalTest.kt b/src/test/kotlin/agents_engine/core/HumanApprovalTest.kt new file mode 100644 index 0000000..1f4d241 --- /dev/null +++ b/src/test/kotlin/agents_engine/core/HumanApprovalTest.kt @@ -0,0 +1,265 @@ +package agents_engine.core + +import agents_engine.generation.Generable +import agents_engine.generation.Guide +import agents_engine.model.LlmMessage +import agents_engine.model.LlmResponse +import agents_engine.model.ModelClient +import agents_engine.model.Tool +import agents_engine.model.ToolCall +import kotlinx.coroutines.runBlocking +import kotlin.time.Duration.Companion.minutes +import org.junit.jupiter.api.assertThrows +import kotlin.test.Test +import kotlin.test.assertEquals +import kotlin.test.assertNotNull +import kotlin.test.assertTrue + +/** + * #2489 — Human approval node, layered on the #2488 interrupt primitive. + * Pins: + * + * 1. `humanApproval { ... }` throws `AgentInterruptException` whose + * payload is an `ApprovalRequest`. + * 2. The four `HumanDecision` variants round-trip through `resumeWith` + * — the model sees a JSON-rendered tool result. + * 3. `PipelineEvent.ApprovalRequested` fires before the throw with the + * title + body-presence + timeout fields (no body / PII in the event). + * 4. `PipelineEvent.ApprovalDecided` fires on resume when `resumeWith` + * is a `HumanDecision`. + * 5. Composes with the manifest-hash restore guard (#2754). + * 6. Blank title fails fast at the builder. + */ +class HumanApprovalTest { + + private fun approvalAgent(): Agent { + val responses = ArrayDeque() + responses.add(LlmResponse.ToolCalls(listOf(ToolCall("approve_deploy", mapOf("plan" to "deploy v42"))))) + responses.add(LlmResponse.Text("done")) + val mock = ModelClient { _ -> responses.removeFirst() } + + return agent("Approver") { + lateinit var approveDeploy: Tool, Any?> + model { ollama("t"); client = mock } + tools { + approveDeploy = tool("approve_deploy", "Approve deploy") { args -> + humanApproval { + title = "Deploy to production?" + body = args["plan"] + timeout = 30.minutes + defaultOnTimeout = HumanDecision.Rejected + } + } + } + skills { skill("s", "") { tools(approveDeploy) } } + } + } + + @Test + fun `humanApproval throws AgentInterruptException whose payload is an ApprovalRequest`() { + val a = approvalAgent() + val ex = assertThrows { a("kick off") } + val req = ex.payload as ApprovalRequest + assertEquals("Deploy to production?", req.title) + assertEquals("deploy v42", req.body) + assertEquals(30.minutes, req.timeout) + assertEquals(HumanDecision.Rejected, req.defaultOnTimeout) + } + + @Test + fun `HumanDecision Approved round-trips through resumeWith`() { + val a = approvalAgent() + val ex = assertThrows { a("kick off") } + val out = runBlocking { + a.invokeSuspendResuming( + input = "kick off", + resumeFrom = ex.snapshot, + resumeWith = HumanDecision.Approved, + ) + } + assertEquals("done", out) + } + + @Test + fun `HumanDecision Rejected round-trips and the synthesised tool message reflects it`() { + val responses = ArrayDeque() + responses.add(LlmResponse.ToolCalls(listOf(ToolCall("approve_deploy", mapOf("plan" to "deploy v42"))))) + responses.add(LlmResponse.Text("done")) + val sawMessages = mutableListOf>() + val mock = ModelClient { msgs -> sawMessages += msgs.toList(); responses.removeFirst() } + + val a = agent("a") { + lateinit var approve: Tool, Any?> + model { ollama("t"); client = mock } + tools { + approve = tool("approve_deploy", "Approve deploy") { _ -> + humanApproval { title = "Deploy?" } + } + } + skills { skill("s", "") { tools(approve) } } + } + + val ex = assertThrows { a("go") } + runBlocking { + a.invokeSuspendResuming( + input = "go", + resumeFrom = ex.snapshot, + resumeWith = HumanDecision.Rejected, + ) + } + + val resumeMsgs = sawMessages[1] + val toolResult = resumeMsgs.last { it.role == "tool" } + // toLlmInput renders sealed object instances via their toString or class name — + // exact rendering shouldn't matter for the test; what matters is the model sees + // SOMETHING that conveys "Rejected". + assertTrue("Rejected" in toolResult.content, "tool message must encode the decision: ${toolResult.content}") + } + + @Test + fun `HumanDecision Edited carries a typed payload`() { + val a = approvalAgent() + val ex = assertThrows { a("go") } + val edited = HumanDecision.Edited(payload = EditedPlan(steps = listOf("staging", "canary 1%", "100%"))) + + val out = runBlocking { + a.invokeSuspendResuming( + input = "go", + resumeFrom = ex.snapshot, + resumeWith = edited, + ) + } + assertEquals("done", out) + } + + @Test + fun `HumanDecision Responded carries a free-form payload`() { + val a = approvalAgent() + val ex = assertThrows { a("go") } + + val out = runBlocking { + a.invokeSuspendResuming( + input = "go", + resumeFrom = ex.snapshot, + resumeWith = HumanDecision.Responded(payload = "ask the user about rollback strategy first"), + ) + } + assertEquals("done", out) + } + + @Test + fun `ApprovalRequested PipelineEvent fires with field-only audit row (no body)`() { + val a = approvalAgent() + val events = mutableListOf() + a.observe { events += it } + + assertThrows { a("go") } + + val req = events.filterIsInstance().single() + assertEquals("Deploy to production?", req.title) + assertTrue(req.hasBody, "body was attached → hasBody true") + assertEquals(30.minutes.inWholeMilliseconds, req.timeoutMs) + // The audit row should NOT carry the body itself — only that one was present. + assertNotNull(req.runtimeContext.requestId) + } + + @Test + fun `ApprovalDecided fires when resume passes a HumanDecision`() { + val a = approvalAgent() + val events = mutableListOf() + a.observe { events += it } + + val ex = assertThrows { a("go") } + runBlocking { + a.invokeSuspendResuming( + input = "go", + resumeFrom = ex.snapshot, + resumeWith = HumanDecision.Edited(payload = "modified plan"), + ) + } + + val decided = events.filterIsInstance().single() + assertEquals("Edited", decided.decision) + assertTrue(decided.hasPayload) + } + + @Test + fun `ApprovalDecided does NOT fire when resume passes a non-HumanDecision value (raw interrupt path)`() { + val responses = ArrayDeque() + responses.add(LlmResponse.ToolCalls(listOf(ToolCall("ask", mapOf("q" to "q"))))) + responses.add(LlmResponse.Text("done")) + val mock = ModelClient { _ -> responses.removeFirst() } + + val a = agent("a") { + lateinit var ask: Tool, Any?> + model { ollama("t"); client = mock } + // Plain interrupt(), not humanApproval — payload is just a string. + tools { ask = tool("ask", "") { _ -> interrupt(payload = "what?") } } + skills { skill("s", "") { tools(ask) } } + } + val events = mutableListOf() + a.observe { events += it } + + val ex = assertThrows { a("go") } + runBlocking { + a.invokeSuspendResuming("go", resumeFrom = ex.snapshot, resumeWith = "plain string reply") + } + + assertTrue( + events.none { it is PipelineEvent.ApprovalDecided }, + "ApprovalDecided is gated on resumeWith being a HumanDecision", + ) + // Also no ApprovalRequested for a non-humanApproval interrupt + assertTrue(events.none { it is PipelineEvent.ApprovalRequested }) + } + + @Test + fun `humanApproval composes with manifest-hash restore guard`() { + val responses = ArrayDeque() + responses.add(LlmResponse.ToolCalls(listOf(ToolCall("approve", emptyMap())))) + val mock = ModelClient { _ -> responses.removeFirst() } + + val a = agent("ManifestedApprover") { + lateinit var approve: Tool, Any?> + model { ollama("t"); client = mock } + tools { approve = tool("approve", "") { _ -> humanApproval { title = "?" } } } + skills { skill("s", "") { tools(approve) } } + }.also { it.attachManifestHash("hash-OLD") } + + val ex = assertThrows { a("go") } + + val newAgent = agent("ManifestedApprover") { + lateinit var approve: Tool, Any?> + model { ollama("t"); client = mock } + tools { approve = tool("approve", "") { _ -> humanApproval { title = "?" } } } + skills { skill("s", "") { tools(approve) } } + }.also { it.attachManifestHash("hash-NEW") } + + assertThrows { + runBlocking { + newAgent.invokeSuspendResuming( + input = "go", + resumeFrom = ex.snapshot, + resumeWith = HumanDecision.Approved, + ) + } + } + } + + @Test + fun `humanApproval with blank title fails fast at the builder`() { + // We can't reach humanApproval { title = "" } from inside a real tool + // executor because the builder throws BEFORE interrupt — so the build + // call itself surfaces the error to whoever calls it (here, directly). + val ex = assertThrows { + ApprovalBuilder().apply { title = "" }.build() + } + assertTrue("title" in ex.message!!.lowercase(), "error names the missing field: ${ex.message}") + } + + @Generable("A modified deploy plan from the human reviewer.") + data class EditedPlan( + @Guide("Ordered list of deploy steps") + val steps: List, + ) +}