BackendStack21
diff --git a/‎cmd/odek/audit.go‎
Lines changed: 139 additions & 0 deletions b/‎cmd/odek/audit.go‎
Lines changed: 139 additions & 0 deletions
diff --git a/‎cmd/odek/audit_cmd_test.go‎
Lines changed: 207 additions & 0 deletions b/‎cmd/odek/audit_cmd_test.go‎
Lines changed: 207 additions & 0 deletions
@@ -0,0 +1,139 @@
+package main
+
+import (
+	"encoding/json"
+	"fmt"
+	"os"
+	"strings"
+
+	"github.com/BackendStack21/odek/internal/llm"
+	"github.com/BackendStack21/odek/internal/session"
+)
+
+// recordTurnAudit summarises a single agent turn into the audit log:
+// which tools were called, which resources they touched, whether any
+// untrusted content was ingested, and whether the resources referenced
+// by tool calls diverge from those mentioned in the user message.
+//
+// "Divergence" is a heuristic: a turn is flagged as suspicious when
+// the agent ingested untrusted content AND the tools called referenced
+// resources (URLs, paths, dotted names) that the user did not mention.
+// This is exactly the footprint of a successful prompt injection that
+// steered the agent toward an attacker-chosen resource.
+func recordTurnAudit(store *session.AuditStore, sessionID string, turn int, userText string, newMsgs []llm.Message) {
+	if store == nil {
+		return
+	}
+
+	var toolCalls []string
+	var toolText strings.Builder
+	ingestedUntrusted := false
+
+	for _, m := range newMsgs {
+		for _, tc := range m.ToolCalls {
+			toolCalls = append(toolCalls, tc.Function.Name)
+			toolText.WriteString(tc.Function.Arguments)
+			toolText.WriteByte(' ')
+		}
+		if m.Role == "tool" {
+			toolText.WriteString(m.Content)
+			toolText.WriteByte(' ')
+			if hasUntrustedWrapper(m.Content) {
+				ingestedUntrusted = true
+			}
+		}
+	}
+
+	novel := session.NovelResources(userText, toolText.String())
+
+	// We do not flag divergence on untainted turns — a trusted internal
+	// search legitimately surfaces resources the user did not name.
+	suspicious := ingestedUntrusted && len(novel) > 0
+
+	at := session.AuditTurn{
+		Turn:                 turn,
+		UserMessage:          userText,
+		ToolCalls:            toolCalls,
+		NovelResources:       novel,
+		IngestedUntrusted:    ingestedUntrusted,
+		SuspiciousDivergence: suspicious,
+	}
+	_ = store.RecordTurn(sessionID, at)
+}
+
+// auditCmd handles `odek audit <session-id>` and `odek audit --list`.
+// Read-only: it never modifies the audit log. Output is JSON to stdout
+// so the caller can pipe through jq / their tool of choice.
+func auditCmd(args []string) error {
+	if len(args) == 0 {
+		printAuditUsage()
+		return fmt.Errorf("audit: argument required")
+	}
+	store, err := session.NewStore()
+	if err != nil {
+		return fmt.Errorf("audit: session store: %w", err)
+	}
+	auditStore := session.NewAuditStore(store.Dir())
+
+	switch args[0] {
+	case "--help", "-h", "help":
+		printAuditUsage()
+		return nil
+	case "--list":
+		return auditList(store, auditStore)
+	default:
+		log, err := auditStore.Load(args[0])
+		if err != nil {
+			return fmt.Errorf("audit: load: %w", err)
+		}
+		out, err := json.MarshalIndent(log, "", "  ")
+		if err != nil {
+			return fmt.Errorf("audit: marshal: %w", err)
+		}
+		fmt.Println(string(out))
+		return nil
+	}
+}
+
+func auditList(store *session.Store, auditStore *session.AuditStore) error {
+	sessions, err := store.List(0)
+	if err != nil {
+		return fmt.Errorf("audit: list sessions: %w", err)
+	}
+	fmt.Fprintf(os.Stderr, "Session                Ingests  Turns  Suspicious  First-Ingest-Source\n")
+	for _, s := range sessions {
+		log, err := auditStore.Load(s.ID)
+		if err != nil || len(log.Ingests) == 0 {
+			continue
+		}
+		suspicious := 0
+		for _, t := range log.Turns {
+			if t.SuspiciousDivergence {
+				suspicious++
+			}
+		}
+		firstSource := log.Ingests[0].Source
+		if len(firstSource) > 40 {
+			firstSource = firstSource[:37] + "..."
+		}
+		fmt.Printf("%-22s %7d %6d %11d  %s\n",
+			s.ID, len(log.Ingests), len(log.Turns), suspicious, firstSource)
+	}
+	return nil
+}
+
+func printAuditUsage() {
+	fmt.Println(`Usage: odek audit <session-id>
+       odek audit --list
+
+Prints the prompt-injection audit log for a session.
+
+The log records every time the agent ingested externally-sourced
+content (a fetched page, a file outside the working directory, an MCP
+tool response, audio transcript, etc.) along with a per-turn
+divergence assessment — turns where the agent referenced resources
+the user did not mention AND the session ingested untrusted content
+are flagged as 'suspicious'.
+
+Output is JSON to stdout.`)
+}
@@ -0,0 +1,207 @@
+package main
+
+import (
+	"strings"
+	"testing"
+
+	"github.com/BackendStack21/odek/internal/session"
+)
+
+// captureStderrDuring uses the existing captureStderr helper (from
+// sandbox_test.go, which returns a flush closure) and runs fn inside.
+func captureStderrDuring(t *testing.T, fn func()) string {
+	t.Helper()
+	flush := captureStderr(t)
+	fn()
+	return flush()
+}
+
+// withTempHome redirects HOME to a fresh tempdir so session.NewStore
+// writes under a sandbox path.
+func withTempHome(t *testing.T) string {
+	t.Helper()
+	dir := t.TempDir()
+	t.Setenv("HOME", dir)
+	return dir
+}
+
+func TestPrintAuditUsage_OutputsKeyTokens(t *testing.T) {
+	out := captureStdout(printAuditUsage)
+	for _, want := range []string{
+		"odek audit",
+		"<session-id>",
+		"--list",
+		"audit log",
+		"suspicious",
+		"JSON",
+	} {
+		if !strings.Contains(out, want) {
+			t.Errorf("usage missing %q\noutput:\n%s", want, out)
+		}
+	}
+}
+
+func TestAuditCmd_NoArgs_PrintsUsageAndErrors(t *testing.T) {
+	withTempHome(t)
+	out := captureStdout(func() {
+		err := auditCmd(nil)
+		if err == nil {
+			t.Error("expected error when called with no args")
+		} else if !strings.Contains(err.Error(), "argument required") {
+			t.Errorf("error = %v, want 'argument required'", err)
+		}
+	})
+	if !strings.Contains(out, "odek audit") {
+		t.Errorf("usage should have been printed, got:\n%s", out)
+	}
+}
+
+func TestAuditCmd_Help_PrintsUsage(t *testing.T) {
+	withTempHome(t)
+	for _, flag := range []string{"--help", "-h", "help"} {
+		t.Run(flag, func(t *testing.T) {
+			out := captureStdout(func() {
+				if err := auditCmd([]string{flag}); err != nil {
+					t.Fatalf("auditCmd(%q): %v", flag, err)
+				}
+			})
+			if !strings.Contains(out, "odek audit") {
+				t.Errorf("usage missing from %q output:\n%s", flag, out)
+			}
+		})
+	}
+}
+
+func TestAuditCmd_LoadByID_NoSuchSession(t *testing.T) {
+	withTempHome(t)
+	// AuditStore.Load returns empty AuditLog when the file is missing,
+	// not an error — so auditCmd should succeed and print an empty log.
+	out := captureStdout(func() {
+		if err := auditCmd([]string{"20260529-deadbe"}); err != nil {
+			t.Fatalf("auditCmd: %v", err)
+		}
+	})
+	// Empty log marshals with a session_id field set but ingests/turns null.
+	if !strings.Contains(out, "\"session_id\"") {
+		t.Errorf("expected JSON with session_id key, got:\n%s", out)
+	}
+	if !strings.Contains(out, "20260529-deadbe") {
+		t.Errorf("expected the session ID echoed in the JSON, got:\n%s", out)
+	}
+}
+
+func TestAuditCmd_LoadByID_InvalidID(t *testing.T) {
+	withTempHome(t)
+	err := auditCmd([]string{"../etc/passwd"})
+	if err == nil {
+		t.Fatal("expected error for path-traversal ID")
+	}
+	if !strings.Contains(err.Error(), "audit:") {
+		t.Errorf("error should be wrapped with 'audit:', got: %v", err)
+	}
+}
+
+func TestAuditCmd_List_EmptyStore(t *testing.T) {
+	withTempHome(t)
+	// No sessions yet → header on stderr, no rows on stdout, no error.
+	stderr := captureStderrDuring(t, func() {
+		if err := auditCmd([]string{"--list"}); err != nil {
+			t.Fatalf("auditCmd --list: %v", err)
+		}
+	})
+	if !strings.Contains(stderr, "Session") || !strings.Contains(stderr, "Ingests") {
+		t.Errorf("expected header on stderr, got:\n%s", stderr)
+	}
+}
+
+func TestAuditCmd_LoadByID_RoundtripWithRecorded(t *testing.T) {
+	withTempHome(t)
+
+	// Stand up a real session + audit log so auditCmd has something to load.
+	store, err := session.NewStore()
+	if err != nil {
+		t.Fatalf("NewStore: %v", err)
+	}
+	auditStore := session.NewAuditStore(store.Dir())
+
+	const sid = "20260529-abc001"
+	if err := auditStore.RecordIngest(sid, 1, "https://example.com", "hello"); err != nil {
+		t.Fatalf("RecordIngest: %v", err)
+	}
+	if err := auditStore.RecordTurn(sid, session.AuditTurn{
+		Turn:                 1,
+		UserMessage:          "do thing",
+		ToolCalls:            []string{"shell"},
+		IngestedUntrusted:    true,
+		SuspiciousDivergence: false,
+	}); err != nil {
+		t.Fatalf("RecordTurn: %v", err)
+	}
+
+	out := captureStdout(func() {
+		if err := auditCmd([]string{sid}); err != nil {
+			t.Fatalf("auditCmd: %v", err)
+		}
+	})
+	for _, want := range []string{
+		sid,
+		"https://example.com",
+		"\"ingested_untrusted\": true",
+		"\"tool_calls\"",
+	} {
+		if !strings.Contains(out, want) {
+			t.Errorf("audit dump missing %q\noutput:\n%s", want, out)
+		}
+	}
+}
+
+func TestAuditList_PopulatedSession(t *testing.T) {
+	withTempHome(t)
+
+	store, err := session.NewStore()
+	if err != nil {
+		t.Fatalf("NewStore: %v", err)
+	}
+	// Save a real session so store.List returns something.
+	sess := session.Session{
+		ID:    "20260529-listme",
+		Task:  "test",
+		Turns: 1,
+	}
+	if err := store.Save(&sess); err != nil {
+		t.Fatalf("Save session: %v", err)
+	}
+
+	auditStore := session.NewAuditStore(store.Dir())
+	// Long source string to exercise the truncation branch.
+	longSource := strings.Repeat("a", 60)
+	if err := auditStore.RecordIngest(sess.ID, 1, longSource, "data"); err != nil {
+		t.Fatalf("RecordIngest: %v", err)
+	}
+	if err := auditStore.RecordTurn(sess.ID, session.AuditTurn{
+		Turn:                 1,
+		IngestedUntrusted:    true,
+		SuspiciousDivergence: true,
+	}); err != nil {
+		t.Fatalf("RecordTurn: %v", err)
+	}
+
+	// auditList writes the header to stderr and the rows to stdout, so
+	// capture both. The order is: open stderr capture, then run the
+	// stdout capture (which executes the function).
+	flushStderr := captureStderr(t)
+	stdout := captureStdout(func() {
+		if err := auditList(store, auditStore); err != nil {
+			t.Fatalf("auditList: %v", err)
+		}
+	})
+	stderr := flushStderr()
+	combined := stderr + stdout
+
+	if !strings.Contains(combined, sess.ID) {
+		t.Errorf("auditList should list session %q\nstderr:\n%s\nstdout:\n%s", sess.ID, stderr, stdout)
+	}
+	if !strings.Contains(combined, "...") {
+		t.Errorf("long source should have been truncated with '...'\nstderr:\n%s\nstdout:\n%s", stderr, stdout)
+	}
+}