From 15b23181d51edf7597c7ca0ad20ff217e77c96fb Mon Sep 17 00:00:00 2001
From: hjeddad <hamza-jeddad@users.noreply.github.com>
Date: Mon, 8 Jun 2026 20:30:33 +0000
Subject: [PATCH 1/6] feat(memory): add inject_memories config fields (#3014)

- Add InjectMemories, MaxInjectMemories, InjectMemoriesStrategy fields to AgentConfig
- Add InjectMemoriesStrategyLocal constant and DefaultMaxInjectMemories = 10
- Add validateInjectMemories() method called from Config.Validate()
- Add inject_memories, max_inject_memories, inject_memories_strategy to agent-schema.json
  (only 'local' strategy shipped; 'llm' planned for a future release)
- Add table-driven tests covering valid and invalid configurations
---
 agent-schema.json                             | 23 +++++-
 pkg/config/latest/types.go                    | 47 +++++++++---
 pkg/config/latest/validate.go                 | 30 ++++++++
 .../latest/validate_inject_memories_test.go   | 75 +++++++++++++++++++
 4 files changed, 162 insertions(+), 13 deletions(-)
 create mode 100644 pkg/config/latest/validate_inject_memories_test.go

diff --git a/agent-schema.json b/agent-schema.json
index 374c82346..fae43b75e 100644
--- a/agent-schema.json
+++ b/agent-schema.json
@@ -94,7 +94,7 @@
     },
     "runtime": {
       "$ref": "#/definitions/RuntimeDefaults",
-      "description": "Execution-time defaults the agent author wants applied. Values act as defaults only — explicit CLI flags or user-config settings always win."
+      "description": "Execution-time defaults the agent author wants applied. Values act as defaults only \u2014 explicit CLI flags or user-config settings always win."
     }
   },
   "additionalProperties": false,
@@ -614,6 +614,25 @@
             "type": "string"
           }
         },
+        "inject_memories": {
+          "type": "boolean",
+          "default": false,
+          "description": "Opt the agent into automatic memory retrieval at the start of every turn. When true, the runtime fetches relevant memories from the agent's configured memory toolset and injects them into the conversation as a transient system message (never persisted). Requires a memory toolset to be configured on the agent; otherwise the runtime emits a warning and the hook is a no-op."
+        },
+        "max_inject_memories": {
+          "type": "integer",
+          "minimum": 0,
+          "default": 10,
+          "description": "Maximum number of memories injected per turn. 0 means use the default (10)."
+        },
+        "inject_memories_strategy": {
+          "type": "string",
+          "enum": [
+            "local"
+          ],
+          "default": "local",
+          "description": "Retrieval strategy. 'local' scores memories with an in-process BM25 ranker against the latest user message (cheap, deterministic, no extra model call). Note: an 'llm' strategy is planned for a future release."
+        },
         "commands": {
           "description": "Named prompts for /commands. Supports simple string format or advanced object format with description and instruction.",
           "oneOf": [
@@ -1586,7 +1605,7 @@
       "properties": {
         "sandbox": {
           "type": "boolean",
-          "description": "When true, run the agent inside a Docker sandbox by default — equivalent to passing --sandbox on the command line. An explicit --sandbox=false on the CLI still wins."
+          "description": "When true, run the agent inside a Docker sandbox by default \u2014 equivalent to passing --sandbox on the command line. An explicit --sandbox=false on the CLI still wins."
         },
         "network_allowlist": {
           "type": "array",
diff --git a/pkg/config/latest/types.go b/pkg/config/latest/types.go
index f722daf73..077836aaa 100644
--- a/pkg/config/latest/types.go
+++ b/pkg/config/latest/types.go
@@ -18,6 +18,17 @@ import (
 
 const Version = "10"
 
+const (
+	// InjectMemoriesStrategyLocal scores memories with an in-process BM25
+	// ranker against the latest user message. Cheap, deterministic, never
+	// calls the model.
+	InjectMemoriesStrategyLocal = "local"
+
+	// DefaultMaxInjectMemories is the default cap when MaxInjectMemories
+	// is unset or zero.
+	DefaultMaxInjectMemories = 10
+)
+
 // Config represents the entire configuration file
 type Config struct {
 	Version   string                    `json:"version,omitempty"`
@@ -446,17 +457,31 @@ type AgentConfig struct {
 	// Pointer (tri-state) so we can distinguish "unset" (nil → default
 	// on) from "explicitly disabled" (false). Use
 	// [AgentConfig.RedactSecretsEnabled] to read the effective value.
-	RedactSecrets           *bool             `json:"redact_secrets,omitempty"`
-	CodeModeTools           bool              `json:"code_mode_tools,omitempty"`
-	AddDescriptionParameter bool              `json:"add_description_parameter,omitempty"`
-	MaxIterations           int               `json:"max_iterations,omitempty"`
-	MaxConsecutiveToolCalls int               `json:"max_consecutive_tool_calls,omitempty"`
-	MaxOldToolCallTokens    int               `json:"max_old_tool_call_tokens,omitempty"`
-	NumHistoryItems         int               `json:"num_history_items,omitempty"`
-	AddPromptFiles          []string          `json:"add_prompt_files,omitempty" yaml:"add_prompt_files,omitempty"`
-	Commands                types.Commands    `json:"commands,omitempty"`
-	StructuredOutput        *StructuredOutput `json:"structured_output,omitempty"`
-	Skills                  SkillsConfig      `json:"skills,omitzero"`
+	RedactSecrets           *bool    `json:"redact_secrets,omitempty"`
+	CodeModeTools           bool     `json:"code_mode_tools,omitempty"`
+	AddDescriptionParameter bool     `json:"add_description_parameter,omitempty"`
+	MaxIterations           int      `json:"max_iterations,omitempty"`
+	MaxConsecutiveToolCalls int      `json:"max_consecutive_tool_calls,omitempty"`
+	MaxOldToolCallTokens    int      `json:"max_old_tool_call_tokens,omitempty"`
+	NumHistoryItems         int      `json:"num_history_items,omitempty"`
+	AddPromptFiles          []string `json:"add_prompt_files,omitempty" yaml:"add_prompt_files,omitempty"`
+
+	// InjectMemories opts the agent into automatic memory retrieval at the
+	// start of every turn. The runtime fetches relevant memories from the
+	// agent's configured memory toolset and injects them as a transient
+	// system message (never persisted to the session).
+	//
+	// Requires a memory toolset to be configured on the agent.
+	// MaxInjectMemories caps the number of memories returned; defaults to
+	// DefaultMaxInjectMemories when zero. InjectMemoriesStrategy selects
+	// the retrieval strategy (see InjectMemoriesStrategy* constants).
+	InjectMemories         bool   `json:"inject_memories,omitempty" yaml:"inject_memories,omitempty"`
+	MaxInjectMemories      int    `json:"max_inject_memories,omitempty" yaml:"max_inject_memories,omitempty"`
+	InjectMemoriesStrategy string `json:"inject_memories_strategy,omitempty" yaml:"inject_memories_strategy,omitempty"`
+
+	Commands         types.Commands    `json:"commands,omitempty"`
+	StructuredOutput *StructuredOutput `json:"structured_output,omitempty"`
+	Skills           SkillsConfig      `json:"skills,omitzero"`
 	// UseCommands and UseSkills reference reusable groups defined in the
 	// top-level Config.Commands / Config.Skills sections. The referenced
 	// groups are merged into Commands / Skills during config resolution;
diff --git a/pkg/config/latest/validate.go b/pkg/config/latest/validate.go
index 2d2243142..549b5b9df 100644
--- a/pkg/config/latest/validate.go
+++ b/pkg/config/latest/validate.go
@@ -49,6 +49,9 @@ func (t *Config) Validate() error {
 				return err
 			}
 		}
+		if err := agent.validateInjectMemories(); err != nil {
+			return err
+		}
 		if agent.Hooks != nil {
 			if err := agent.Hooks.Validate(); err != nil {
 				return err
@@ -170,6 +173,33 @@ func (a *AgentConfig) validateHarness() error {
 	return nil
 }
 
+// validateInjectMemories validates the inject_memories family of fields.
+func (a *AgentConfig) validateInjectMemories() error {
+	if !a.InjectMemories {
+		// The companion fields are tolerated when inject_memories is
+		// false — matches the convention for max_iterations et al.
+		return nil
+	}
+
+	switch a.InjectMemoriesStrategy {
+	case "", InjectMemoriesStrategyLocal:
+		// ok; "" is normalised to local at apply time.
+	default:
+		return fmt.Errorf("agent %q: inject_memories_strategy %q is invalid (expected %q)",
+			a.Name, a.InjectMemoriesStrategy, InjectMemoriesStrategyLocal)
+	}
+
+	if a.MaxInjectMemories < 0 {
+		return fmt.Errorf("agent %q: max_inject_memories must be >= 0 (got %d)",
+			a.Name, a.MaxInjectMemories)
+	}
+
+	// Toolset presence is not validated here — config validation has no
+	// toolset semantics. The runtime emits a warning and falls back to a
+	// no-op when the memory toolset is missing.
+	return nil
+}
+
 func (t *Toolset) validate() error {
 	// Attributes used on the wrong toolset type.
 	if len(t.Shell) > 0 && t.Type != "script" {
diff --git a/pkg/config/latest/validate_inject_memories_test.go b/pkg/config/latest/validate_inject_memories_test.go
new file mode 100644
index 000000000..31ab6249c
--- /dev/null
+++ b/pkg/config/latest/validate_inject_memories_test.go
@@ -0,0 +1,75 @@
+package latest
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+)
+
+func TestValidateInjectMemories(t *testing.T) {
+	t.Parallel()
+
+	tests := []struct {
+		name    string
+		cfg     AgentConfig
+		wantErr string
+	}{
+		{
+			name: "disabled with no companion fields is valid",
+			cfg:  AgentConfig{Name: "agent"},
+		},
+		{
+			name: "disabled with strategy set is valid",
+			cfg:  AgentConfig{Name: "agent", InjectMemoriesStrategy: InjectMemoriesStrategyLocal},
+		},
+		{
+			name: "disabled with max set is valid",
+			cfg:  AgentConfig{Name: "agent", MaxInjectMemories: 5},
+		},
+		{
+			name: "enabled with empty strategy is valid (defaults to local)",
+			cfg:  AgentConfig{Name: "agent", InjectMemories: true},
+		},
+		{
+			name: "enabled with local strategy is valid",
+			cfg:  AgentConfig{Name: "agent", InjectMemories: true, InjectMemoriesStrategy: InjectMemoriesStrategyLocal},
+		},
+		{
+			name: "enabled with max_inject_memories zero is valid",
+			cfg:  AgentConfig{Name: "agent", InjectMemories: true, MaxInjectMemories: 0},
+		},
+		{
+			name: "enabled with positive max is valid",
+			cfg:  AgentConfig{Name: "agent", InjectMemories: true, MaxInjectMemories: 20},
+		},
+		{
+			name:    "enabled with invalid strategy is rejected",
+			cfg:     AgentConfig{Name: "myagent", InjectMemories: true, InjectMemoriesStrategy: "bogus"},
+			wantErr: `agent "myagent": inject_memories_strategy "bogus" is invalid`,
+		},
+		{
+			name:    "enabled with negative max is rejected",
+			cfg:     AgentConfig{Name: "myagent", InjectMemories: true, MaxInjectMemories: -1},
+			wantErr: `agent "myagent": max_inject_memories must be >= 0 (got -1)`,
+		},
+		{
+			name:    "enabled with llm strategy is rejected (not yet shipped)",
+			cfg:     AgentConfig{Name: "myagent", InjectMemories: true, InjectMemoriesStrategy: "llm"},
+			wantErr: `agent "myagent": inject_memories_strategy "llm" is invalid`,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			t.Parallel()
+			err := tt.cfg.validateInjectMemories()
+			if tt.wantErr != "" {
+				require.Error(t, err)
+				assert.Contains(t, err.Error(), tt.wantErr)
+			} else {
+				require.NoError(t, err)
+			}
+		})
+	}
+}

From 925d1dd636564919b2ef51f4e1ba62faf64eb2e7 Mon Sep 17 00:00:00 2001
From: hjeddad <hamza-jeddad@users.noreply.github.com>
Date: Mon, 8 Jun 2026 20:31:49 +0000
Subject: [PATCH 2/6] feat(memory): scaffold inject_memories turn_start builtin
 (#3015)

- Add BuiltinInjectMemories constant and injectMemoriesBuiltin method on LocalRuntime
- Register builtin in NewLocalRuntime alongside cache_response
- No-op implementation; retrieval logic lands in #3013
- Add unit test for no-op return contract
---
 pkg/runtime/inject_memories.go      | 25 +++++++++++++++++++++++++
 pkg/runtime/inject_memories_test.go | 23 +++++++++++++++++++++++
 pkg/runtime/runtime.go              |  3 +++
 3 files changed, 51 insertions(+)
 create mode 100644 pkg/runtime/inject_memories.go
 create mode 100644 pkg/runtime/inject_memories_test.go

diff --git a/pkg/runtime/inject_memories.go b/pkg/runtime/inject_memories.go
new file mode 100644
index 000000000..6f866021c
--- /dev/null
+++ b/pkg/runtime/inject_memories.go
@@ -0,0 +1,25 @@
+package runtime
+
+import (
+	"context"
+
+	"github.com/docker/docker-agent/pkg/hooks"
+)
+
+// BuiltinInjectMemories is the name of the turn_start builtin that
+// retrieves relevant memories at the start of every turn and injects
+// them into the conversation as a transient system message.
+//
+// Like cache_response (see cache.go) the builtin is registered on the
+// runtime's hooks registry as a closure so it can resolve the agent
+// (and therefore its memory toolset and snapshot cache) by name from
+// [hooks.Input.AgentName].
+const BuiltinInjectMemories = "inject_memories"
+
+// injectMemoriesBuiltin is the turn_start builtin entry point.
+// The actual retrieval logic lands in a later commit; this skeleton
+// keeps the registration valid so applyInjectMemoriesDefault can wire
+// the hook entry without LookupBuiltin failing.
+func (r *LocalRuntime) injectMemoriesBuiltin(_ context.Context, _ *hooks.Input, _ []string) (*hooks.Output, error) {
+	return nil, nil
+}
diff --git a/pkg/runtime/inject_memories_test.go b/pkg/runtime/inject_memories_test.go
new file mode 100644
index 000000000..6b2c05b29
--- /dev/null
+++ b/pkg/runtime/inject_memories_test.go
@@ -0,0 +1,23 @@
+package runtime
+
+import (
+	"context"
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+
+	"github.com/docker/docker-agent/pkg/hooks"
+)
+
+// TestInjectMemoriesBuiltin_ReturnsNilWhenNoopStub verifies that the scaffold
+// implementation is a no-op: it returns (nil, nil) so the hook pipeline
+// treats it as contributing no additional context.
+func TestInjectMemoriesBuiltin_ReturnsNilWhenNoopStub(t *testing.T) {
+	t.Parallel()
+
+	rt := &LocalRuntime{}
+	out, err := rt.injectMemoriesBuiltin(context.Background(), &hooks.Input{AgentName: "a"}, nil)
+	require.NoError(t, err)
+	assert.Nil(t, out)
+}
diff --git a/pkg/runtime/runtime.go b/pkg/runtime/runtime.go
index 95f2e0f11..45d64122f 100644
--- a/pkg/runtime/runtime.go
+++ b/pkg/runtime/runtime.go
@@ -549,6 +549,9 @@ func NewLocalRuntime(agents *team.Team, opts ...Opt) (*LocalRuntime, error) {
 	if err := r.hooksRegistry.RegisterBuiltin(BuiltinCacheResponse, r.cacheResponseBuiltin); err != nil {
 		return nil, fmt.Errorf("register %q builtin: %w", BuiltinCacheResponse, err)
 	}
+	if err := r.hooksRegistry.RegisterBuiltin(BuiltinInjectMemories, r.injectMemoriesBuiltin); err != nil {
+		return nil, fmt.Errorf("register %q builtin: %w", BuiltinInjectMemories, err)
+	}
 
 	// Build the cooldown manager and wire the fallback executor's
 	// runtime-bound dependencies after opts so they pick up the final

From 9ba4b026b1378709b20f6ef97c14e3eacdde7c13 Mon Sep 17 00:00:00 2001
From: hjeddad <hamza-jeddad@users.noreply.github.com>
Date: Mon, 8 Jun 2026 20:34:30 +0000
Subject: [PATCH 3/6] feat(memory): wire inject_memories through runtime
 (#3016)

- Add injectMemories/maxInjectMemories fields and accessors to Agent
- Add WithInjectMemories opt to pkg/agent/opts.go
- Wire WithInjectMemories in teamloader alongside other agent flags
- Add applyInjectMemoriesDefault() mirroring applyCacheDefault pattern
- Add effectiveMaxInjectMemories() fallback helper
- Wire applyInjectMemoriesDefault into buildHooksExecutors
- Populate AgentName and LastUserMessage in executeTurnStartHooks input
- Expand tests: applyInjectMemoriesDefault and effectiveMaxInjectMemories
---
 pkg/agent/agent.go                  | 10 +++++
 pkg/agent/opts.go                   |  7 ++++
 pkg/runtime/hooks.go                |  5 ++-
 pkg/runtime/inject_memories.go      | 31 +++++++++++++++
 pkg/runtime/inject_memories_test.go | 58 +++++++++++++++++++++++++++++
 pkg/teamloader/teamloader.go        |  4 ++
 6 files changed, 114 insertions(+), 1 deletion(-)

diff --git a/pkg/agent/agent.go b/pkg/agent/agent.go
index cac2d65f0..fb79895d4 100644
--- a/pkg/agent/agent.go
+++ b/pkg/agent/agent.go
@@ -40,6 +40,8 @@ type Agent struct {
 	maxOldToolCallTokens    int
 	numHistoryItems         int
 	addPromptFiles          []string
+	injectMemories          bool
+	maxInjectMemories       int
 	tools                   []tools.Tool
 	commands                types.Commands
 	harness                 *latest.HarnessConfig
@@ -116,6 +118,14 @@ func (a *Agent) AddPromptFiles() []string {
 	return a.addPromptFiles
 }
 
+func (a *Agent) InjectMemories() bool {
+	return a.injectMemories
+}
+
+func (a *Agent) MaxInjectMemories() int {
+	return a.maxInjectMemories
+}
+
 // Description returns the agent's description
 func (a *Agent) Description() string {
 	return a.description
diff --git a/pkg/agent/opts.go b/pkg/agent/opts.go
index 0b7e19232..56998317b 100644
--- a/pkg/agent/opts.go
+++ b/pkg/agent/opts.go
@@ -132,6 +132,13 @@ func WithAddPromptFiles(addPromptFiles []string) Opt {
 	}
 }
 
+func WithInjectMemories(enabled bool, maxMemories int) Opt {
+	return func(a *Agent) {
+		a.injectMemories = enabled
+		a.maxInjectMemories = maxMemories
+	}
+}
+
 func WithMaxIterations(maxIterations int) Opt {
 	return func(a *Agent) {
 		a.maxIterations = maxIterations
diff --git a/pkg/runtime/hooks.go b/pkg/runtime/hooks.go
index faf5ad1f2..faf3469a4 100644
--- a/pkg/runtime/hooks.go
+++ b/pkg/runtime/hooks.go
@@ -42,6 +42,7 @@ func (r *LocalRuntime) buildHooksExecutors() {
 		})
 		cfg = applyAutoInjectors(cfg, r.autoInjectors)
 		cfg = applyCacheDefault(cfg, a)
+		cfg = applyInjectMemoriesDefault(cfg, a)
 		if cfg == nil {
 			continue
 		}
@@ -153,7 +154,9 @@ func (r *LocalRuntime) executeSessionStartHooks(ctx context.Context, sess *sessi
 // contents of a prompt file the user might be editing mid-session.
 func (r *LocalRuntime) executeTurnStartHooks(ctx context.Context, sess *session.Session, a *agent.Agent, events EventSink) []chat.Message {
 	return contextMessages(r.dispatchHook(ctx, a, hooks.EventTurnStart, &hooks.Input{
-		SessionID: sess.ID,
+		SessionID:       sess.ID,
+		AgentName:       a.Name(),
+		LastUserMessage: sess.GetLastUserMessageContent(),
 	}, events))
 }
 
diff --git a/pkg/runtime/inject_memories.go b/pkg/runtime/inject_memories.go
index 6f866021c..55e7118a4 100644
--- a/pkg/runtime/inject_memories.go
+++ b/pkg/runtime/inject_memories.go
@@ -3,6 +3,8 @@ package runtime
 import (
 	"context"
 
+	"github.com/docker/docker-agent/pkg/agent"
+	"github.com/docker/docker-agent/pkg/config/latest"
 	"github.com/docker/docker-agent/pkg/hooks"
 )
 
@@ -16,6 +18,35 @@ import (
 // [hooks.Input.AgentName].
 const BuiltinInjectMemories = "inject_memories"
 
+// applyInjectMemoriesDefault appends the inject_memories turn_start hook to
+// cfg when the agent has inject_memories enabled. Mirrors the role of
+// [applyCacheDefault] for the cache_response stop hook.
+//
+// The helper accepts (and may return) a nil cfg so callers can chain it
+// after [builtins.ApplyAgentDefaults] without an extra branch.
+func applyInjectMemoriesDefault(cfg *hooks.Config, a *agent.Agent) *hooks.Config {
+	if !a.InjectMemories() {
+		return cfg
+	}
+	if cfg == nil {
+		cfg = &hooks.Config{}
+	}
+	cfg.TurnStart = append(cfg.TurnStart, hooks.Hook{
+		Type:    hooks.HookTypeBuiltin,
+		Command: BuiltinInjectMemories,
+	})
+	return cfg
+}
+
+// effectiveMaxInjectMemories returns the configured cap, falling back to
+// [latest.DefaultMaxInjectMemories] when the agent value is zero.
+func effectiveMaxInjectMemories(a *agent.Agent) int {
+	if n := a.MaxInjectMemories(); n > 0 {
+		return n
+	}
+	return latest.DefaultMaxInjectMemories
+}
+
 // injectMemoriesBuiltin is the turn_start builtin entry point.
 // The actual retrieval logic lands in a later commit; this skeleton
 // keeps the registration valid so applyInjectMemoriesDefault can wire
diff --git a/pkg/runtime/inject_memories_test.go b/pkg/runtime/inject_memories_test.go
index 6b2c05b29..fc73d8919 100644
--- a/pkg/runtime/inject_memories_test.go
+++ b/pkg/runtime/inject_memories_test.go
@@ -7,6 +7,8 @@ import (
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 
+	"github.com/docker/docker-agent/pkg/agent"
+	"github.com/docker/docker-agent/pkg/config/latest"
 	"github.com/docker/docker-agent/pkg/hooks"
 )
 
@@ -21,3 +23,59 @@ func TestInjectMemoriesBuiltin_ReturnsNilWhenNoopStub(t *testing.T) {
 	require.NoError(t, err)
 	assert.Nil(t, out)
 }
+
+// TestApplyInjectMemoriesDefault verifies that the turn_start hook entry is
+// appended only when inject_memories is enabled on the agent.
+func TestApplyInjectMemoriesDefault(t *testing.T) {
+	t.Parallel()
+
+	t.Run("disabled leaves cfg unchanged", func(t *testing.T) {
+		t.Parallel()
+		a := agent.New("a", "")
+		got := applyInjectMemoriesDefault(nil, a)
+		assert.Nil(t, got)
+	})
+
+	t.Run("enabled appends hook to nil cfg", func(t *testing.T) {
+		t.Parallel()
+		a := agent.New("a", "", agent.WithInjectMemories(true, 5))
+		got := applyInjectMemoriesDefault(nil, a)
+		require.NotNil(t, got)
+		require.Len(t, got.TurnStart, 1)
+		assert.Equal(t, hooks.HookTypeBuiltin, got.TurnStart[0].Type)
+		assert.Equal(t, BuiltinInjectMemories, got.TurnStart[0].Command)
+	})
+
+	t.Run("enabled appends hook to existing cfg", func(t *testing.T) {
+		t.Parallel()
+		a := agent.New("a", "", agent.WithInjectMemories(true, 0))
+		existing := &hooks.Config{
+			TurnStart: []hooks.Hook{{Type: hooks.HookTypeBuiltin, Command: "add_date"}},
+		}
+		got := applyInjectMemoriesDefault(existing, a)
+		require.Len(t, got.TurnStart, 2)
+		assert.Equal(t, BuiltinInjectMemories, got.TurnStart[1].Command)
+	})
+}
+
+// TestEffectiveMaxInjectMemories verifies the fallback to DefaultMaxInjectMemories.
+func TestEffectiveMaxInjectMemories(t *testing.T) {
+	t.Parallel()
+
+	tests := []struct {
+		name string
+		max  int
+		want int
+	}{
+		{name: "zero uses default", max: 0, want: latest.DefaultMaxInjectMemories},
+		{name: "positive value used as-is", max: 3, want: 3},
+		{name: "large value used as-is", max: 100, want: 100},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			t.Parallel()
+			a := agent.New("a", "", agent.WithInjectMemories(true, tt.max))
+			assert.Equal(t, tt.want, effectiveMaxInjectMemories(a))
+		})
+	}
+}
diff --git a/pkg/teamloader/teamloader.go b/pkg/teamloader/teamloader.go
index c55ee63ea..fcb22a215 100644
--- a/pkg/teamloader/teamloader.go
+++ b/pkg/teamloader/teamloader.go
@@ -176,6 +176,10 @@ func LoadWithConfig(ctx context.Context, agentSource config.Source, runConfig *c
 			agent.WithAddDescriptionParameter(agentConfig.AddDescriptionParameter),
 			agent.WithRedactSecrets(agentConfig.RedactSecretsEnabled()),
 			agent.WithAddPromptFiles(promptFiles),
+			agent.WithInjectMemories(
+				agentConfig.InjectMemories,
+				agentConfig.MaxInjectMemories,
+			),
 			agent.WithMaxIterations(agentConfig.MaxIterations),
 			agent.WithMaxConsecutiveToolCalls(agentConfig.MaxConsecutiveToolCalls),
 			agent.WithMaxOldToolCallTokens(agentConfig.MaxOldToolCallTokens),

From 701b01e5f79485d6924cf4b7c96a684dc10d3b05 Mon Sep 17 00:00:00 2001
From: hjeddad <hamza-jeddad@users.noreply.github.com>
Date: Mon, 8 Jun 2026 20:42:37 +0000
Subject: [PATCH 4/6] feat(memory): implement inject_memories retrieval
 pipeline (#3013)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Add InjectMemoriesStrategy field/accessor to Agent; update WithInjectMemories
- Add DB() and SetDB() accessors to memory.ToolSet
- Add inject_memories_bm25.go: self-contained BM25 ranker (k1=1.5, b=0.75)
  matching pkg/rag/strategy/bm25.go tokenisation for consistency
- Implement injectMemoriesBuiltin with 'local' strategy:
  GetMemories → BM25 rank → XML inject as AdditionalContext
- lookupMemoryDB resolves first memory toolset from agent.ToolSets()
- formatMemoriesXML wraps hits in <memories>...</memories> with XML escaping
- Degrade gracefully: no toolset, empty DB, zero hits all return nil
- Add BM25 unit tests and full pipeline integration tests with fake DB
---
 pkg/agent/agent.go                       |   5 +
 pkg/agent/opts.go                        |   3 +-
 pkg/runtime/inject_memories.go           | 126 +++++++++++-
 pkg/runtime/inject_memories_bm25.go      | 147 ++++++++++++++
 pkg/runtime/inject_memories_bm25_test.go | 134 +++++++++++++
 pkg/runtime/inject_memories_test.go      | 242 ++++++++++++++++++++++-
 pkg/teamloader/teamloader.go             |   1 +
 pkg/tools/builtin/memory/memory.go       |  10 +
 8 files changed, 651 insertions(+), 17 deletions(-)
 create mode 100644 pkg/runtime/inject_memories_bm25.go
 create mode 100644 pkg/runtime/inject_memories_bm25_test.go

diff --git a/pkg/agent/agent.go b/pkg/agent/agent.go
index fb79895d4..ceac8bb70 100644
--- a/pkg/agent/agent.go
+++ b/pkg/agent/agent.go
@@ -42,6 +42,7 @@ type Agent struct {
 	addPromptFiles          []string
 	injectMemories          bool
 	maxInjectMemories       int
+	injectMemoriesStrategy  string
 	tools                   []tools.Tool
 	commands                types.Commands
 	harness                 *latest.HarnessConfig
@@ -126,6 +127,10 @@ func (a *Agent) MaxInjectMemories() int {
 	return a.maxInjectMemories
 }
 
+func (a *Agent) InjectMemoriesStrategy() string {
+	return a.injectMemoriesStrategy
+}
+
 // Description returns the agent's description
 func (a *Agent) Description() string {
 	return a.description
diff --git a/pkg/agent/opts.go b/pkg/agent/opts.go
index 56998317b..2e837e941 100644
--- a/pkg/agent/opts.go
+++ b/pkg/agent/opts.go
@@ -132,10 +132,11 @@ func WithAddPromptFiles(addPromptFiles []string) Opt {
 	}
 }
 
-func WithInjectMemories(enabled bool, maxMemories int) Opt {
+func WithInjectMemories(enabled bool, maxMemories int, strategy string) Opt {
 	return func(a *Agent) {
 		a.injectMemories = enabled
 		a.maxInjectMemories = maxMemories
+		a.injectMemoriesStrategy = strategy
 	}
 }
 
diff --git a/pkg/runtime/inject_memories.go b/pkg/runtime/inject_memories.go
index 55e7118a4..23de92496 100644
--- a/pkg/runtime/inject_memories.go
+++ b/pkg/runtime/inject_memories.go
@@ -1,11 +1,18 @@
 package runtime
 
 import (
+	"bytes"
 	"context"
+	"encoding/xml"
+	"log/slog"
+	"strings"
 
 	"github.com/docker/docker-agent/pkg/agent"
 	"github.com/docker/docker-agent/pkg/config/latest"
 	"github.com/docker/docker-agent/pkg/hooks"
+	"github.com/docker/docker-agent/pkg/memory/database"
+	"github.com/docker/docker-agent/pkg/tools"
+	memory "github.com/docker/docker-agent/pkg/tools/builtin/memory"
 )
 
 // BuiltinInjectMemories is the name of the turn_start builtin that
@@ -47,10 +54,117 @@ func effectiveMaxInjectMemories(a *agent.Agent) int {
 	return latest.DefaultMaxInjectMemories
 }
 
-// injectMemoriesBuiltin is the turn_start builtin entry point.
-// The actual retrieval logic lands in a later commit; this skeleton
-// keeps the registration valid so applyInjectMemoriesDefault can wire
-// the hook entry without LookupBuiltin failing.
-func (r *LocalRuntime) injectMemoriesBuiltin(_ context.Context, _ *hooks.Input, _ []string) (*hooks.Output, error) {
-	return nil, nil
+// injectMemoriesBuiltin retrieves relevant memories at the start of a
+// turn and emits them as turn_start AdditionalContext, wrapped in a
+// stable <memories>...</memories> XML block so the model sees a clean,
+// machine-parseable section even after compaction strips the surrounding
+// system message.
+//
+// Only the "local" strategy is implemented. It ranks all stored memories
+// with an in-process BM25 scorer against the last user message — cheap,
+// deterministic, and requires no extra model call.
+//
+// No-op when:
+//   - input is missing or AgentName/LastUserMessage are empty;
+//   - the agent has no memory toolset configured;
+//   - the agent's memory DB is empty;
+//   - retrieval returns zero hits.
+func (r *LocalRuntime) injectMemoriesBuiltin(ctx context.Context, in *hooks.Input, _ []string) (*hooks.Output, error) {
+	if in == nil || in.AgentName == "" || in.LastUserMessage == "" {
+		return nil, nil
+	}
+	a, err := r.team.Agent(in.AgentName)
+	if err != nil || a == nil {
+		return nil, nil
+	}
+
+	db, ok := r.lookupMemoryDB(a)
+	if !ok {
+		// No memory toolset on this agent; inject_memories is a config
+		// error but we degrade gracefully at runtime.
+		slog.WarnContext(ctx, "inject_memories: no memory toolset found for agent",
+			"agent", a.Name())
+		return nil, nil
+	}
+
+	limit := effectiveMaxInjectMemories(a)
+
+	strategy := a.InjectMemoriesStrategy()
+	if strategy == "" {
+		strategy = latest.InjectMemoriesStrategyLocal
+	}
+
+	var hits []database.UserMemory
+	switch strategy {
+	case latest.InjectMemoriesStrategyLocal:
+		all, err := db.GetMemories(ctx)
+		if err != nil {
+			slog.WarnContext(ctx, "inject_memories: GetMemories failed",
+				"agent", a.Name(), "error", err)
+			return nil, nil
+		}
+		hits = bm25Rank(all, in.LastUserMessage, limit)
+	default:
+		// Unknown strategy — validation should have caught this at
+		// config load. Degrade gracefully.
+		slog.WarnContext(ctx, "inject_memories: unknown strategy, skipping",
+			"agent", a.Name(), "strategy", strategy)
+		return nil, nil
+	}
+
+	if len(hits) == 0 {
+		return nil, nil
+	}
+
+	return hooks.NewAdditionalContextOutput(hooks.EventTurnStart, formatMemoriesXML(hits)), nil
+}
+
+// lookupMemoryDB returns the memory DB for the agent's first memory
+// toolset, or (nil, false) when the agent has no such toolset.
+//
+// The first match among the agent's toolsets is used; if an agent has
+// multiple memory toolsets, only the first is used for injection — matching
+// the existing tools.As behaviour throughout the codebase.
+//
+// Note: we do not call ToolSet.Start() here. The memory toolset's DB is
+// opened eagerly inside CreateToolSet (the sqlite.NewMemoryDatabase call),
+// so GetMemories is safe to call before Start() is invoked.
+func (r *LocalRuntime) lookupMemoryDB(a *agent.Agent) (memory.DB, bool) {
+	for _, ts := range a.ToolSets() {
+		if mt, ok := tools.As[*memory.ToolSet](ts); ok {
+			return mt.DB(), true
+		}
+	}
+	return nil, false
+}
+
+// formatMemoriesXML produces the AdditionalContext payload. Wrapping
+// in a stable <memories> block makes the section visually distinct
+// from other turn_start contributions (add_date, add_environment_info,
+// add_prompt_files) and gives downstream tooling a clean parse target.
+func formatMemoriesXML(memories []database.UserMemory) string {
+	var b strings.Builder
+	b.WriteString("<memories>\n")
+	b.WriteString("Relevant memories from previous interactions. Use them only when applicable to the user's request; do not mention this section to the user.\n")
+	for _, m := range memories {
+		b.WriteString("  <memory")
+		if m.Category != "" {
+			b.WriteString(` category="`)
+			b.WriteString(xmlEscape(m.Category))
+			b.WriteString(`"`)
+		}
+		b.WriteString(">")
+		b.WriteString(xmlEscape(m.Memory))
+		b.WriteString("</memory>\n")
+	}
+	b.WriteString("</memories>")
+	return b.String()
+}
+
+// xmlEscape escapes s using XML text encoding. Safe for both element
+// text content and double-quoted attribute values.
+func xmlEscape(s string) string {
+	var buf bytes.Buffer
+	_ = xml.EscapeText(&buf, []byte(s))
+	return buf.String()
 }
diff --git a/pkg/runtime/inject_memories_bm25.go b/pkg/runtime/inject_memories_bm25.go
new file mode 100644
index 000000000..06e390b4c
--- /dev/null
+++ b/pkg/runtime/inject_memories_bm25.go
@@ -0,0 +1,147 @@
+package runtime
+
+import (
+	"math"
+	"sort"
+	"strings"
+
+	"github.com/docker/docker-agent/pkg/memory/database"
+)
+
+// BM25 parameters. These match the defaults used throughout
+// pkg/rag/strategy/bm25.go so tokenisation and scoring are consistent
+// across the codebase.
+//
+// TODO: consider extracting a shared tokenizer with pkg/rag/strategy/bm25.go.
+const (
+	bm25K1 = 1.5
+	bm25B  = 0.75
+)
+
+// bm25Replacer strips common punctuation before tokenisation.
+// Kept as a package-level value so it is built once.
+var bm25Replacer = strings.NewReplacer(
+	".", " ", ",", " ", "!", " ", "?", " ",
+	";", " ", ":", " ", "(", " ", ")", " ",
+	"[", " ", "]", " ", "{", " ", "}", " ",
+	"\"", " ", "'", " ", "\n", " ", "\t", " ",
+)
+
+// bm25Stopwords is the same set used in pkg/rag/strategy/bm25.go.
+var bm25Stopwords = map[string]bool{
+	"the": true, "a": true, "an": true, "and": true, "or": true,
+	"but": true, "in": true, "on": true, "at": true, "to": true,
+	"for": true, "of": true, "as": true, "by": true, "is": true,
+	"was": true, "are": true, "were": true, "be": true, "been": true,
+}
+
+// bm25Tokenize lowercases, strips punctuation, drops 1- and 2-char
+// tokens, and removes common stopwords. Copied from
+// pkg/rag/strategy/bm25.go so query and memory scoring are consistent.
+func bm25Tokenize(text string) []string {
+	text = strings.ToLower(text)
+	text = bm25Replacer.Replace(text)
+	raw := strings.Fields(text)
+	out := make([]string, 0, len(raw))
+	for _, tok := range raw {
+		if len(tok) > 2 && !bm25Stopwords[tok] {
+			out = append(out, tok)
+		}
+	}
+	return out
+}
+
+type scoredMemory struct {
+	memory database.UserMemory
+	score  float64
+}
+
+// bm25Rank ranks memories against query using BM25 (k1=1.5, b=0.75).
+// Returns memories in descending score order, capped to limit. Memories
+// whose score is <= 0 are excluded. Returns nil when query contains no
+// valid terms or memories is empty.
+func bm25Rank(memories []database.UserMemory, query string, limit int) []database.UserMemory {
+	if limit <= 0 || len(memories) == 0 {
+		return nil
+	}
+	queryTerms := bm25Tokenize(query)
+	if len(queryTerms) == 0 {
+		return nil
+	}
+
+	// Pre-tokenize all documents and build term-frequency maps.
+	type docInfo struct {
+		tf  map[string]int
+		len float64
+	}
+	docs := make([]docInfo, len(memories))
+	totalLen := 0.0
+	for i, m := range memories {
+		tokens := bm25Tokenize(m.Memory)
+		tf := make(map[string]int, len(tokens))
+		for _, t := range tokens {
+			tf[t]++
+		}
+		docs[i] = docInfo{tf: tf, len: float64(len(tokens))}
+		totalLen += float64(len(tokens))
+	}
+
+	avgDocLen := totalLen / float64(len(memories))
+	N := float64(len(memories))
+
+	// Pre-compute document frequency for each query term.
+	df := make(map[string]int, len(queryTerms))
+	for _, term := range queryTerms {
+		for _, d := range docs {
+			if d.tf[term] > 0 {
+				df[term]++
+			}
+		}
+	}
+
+	// Score each document.
+	scored := make([]scoredMemory, 0, len(memories))
+	for i, m := range memories {
+		score := 0.0
+		for _, term := range queryTerms {
+			tf := float64(docs[i].tf[term])
+			if tf == 0 {
+				continue
+			}
+			termDF := float64(df[term])
+			if termDF == 0 {
+				continue
+			}
+			idf := math.Log((N-termDF+0.5)/(termDF+0.5) + 1.0)
+			lengthRatio := 1.0
+			if avgDocLen > 0 {
+				lengthRatio = docs[i].len / avgDocLen
+			}
+			numerator := tf * (bm25K1 + 1.0)
+			denominator := tf + bm25K1*(1.0-bm25B+bm25B*lengthRatio)
+			score += idf * (numerator / denominator)
+		}
+		// Normalise to 0-1 for consistency with the vector similarity
+		// scores used elsewhere in the rag package.
+		score = math.Min(score/float64(len(queryTerms)), 1.0)
+		if score > 0 {
+			scored = append(scored, scoredMemory{memory: m, score: score})
+		}
+	}
+
+	// Sort descending by score.
+	sort.Slice(scored, func(i, j int) bool {
+		return scored[i].score > scored[j].score
+	})
+
+	// Cap to limit.
+	if len(scored) > limit {
+		scored = scored[:limit]
+	}
+
+	out := make([]database.UserMemory, len(scored))
+	for i, s := range scored {
+		out[i] = s.memory
+	}
+	return out
+}
diff --git a/pkg/runtime/inject_memories_bm25_test.go b/pkg/runtime/inject_memories_bm25_test.go
new file mode 100644
index 000000000..c9a11da45
--- /dev/null
+++ b/pkg/runtime/inject_memories_bm25_test.go
@@ -0,0 +1,134 @@
+package runtime
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+
+	"github.com/docker/docker-agent/pkg/memory/database"
+)
+
+func TestBM25Tokenize(t *testing.T) {
+	t.Parallel()
+
+	tests := []struct {
+		name  string
+		input string
+		want  []string
+	}{
+		{
+			name:  "lowercases and strips punctuation",
+			input: "Go, modules! Are fun.",
+			want:  []string{"modules", "fun"},
+		},
+		{
+			name:  "removes stopwords",
+			input: "the quick brown fox",
+			want:  []string{"quick", "brown", "fox"},
+		},
+		{
+			name:  "drops tokens shorter than three chars",
+			input: "Go is ok",
+			want:  []string{},
+		},
+		{
+			name:  "empty input",
+			input: "",
+			want:  []string{},
+		},
+		{
+			name:  "all stopwords",
+			input: "the a an and or but",
+			want:  []string{},
+		},
+		{
+			name:  "normal sentence",
+			input: "user prefers dark mode",
+			want:  []string{"user", "prefers", "dark", "mode"},
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			t.Parallel()
+			got := bm25Tokenize(tt.input)
+			if len(tt.want) == 0 {
+				assert.Empty(t, got)
+			} else {
+				assert.Equal(t, tt.want, got)
+			}
+		})
+	}
+}
+
+func TestBM25Rank(t *testing.T) {
+	t.Parallel()
+
+	memories := []database.UserMemory{
+		{Memory: "I love Go"},
+		{Memory: "Python is great"},
+		{Memory: "Go modules are fun"},
+	}
+
+	t.Run("best match for go modules query comes first", func(t *testing.T) {
+		t.Parallel()
+		result := bm25Rank(memories, "go modules", 3)
+		require.NotEmpty(t, result)
+		assert.Equal(t, "Go modules are fun", result[0].Memory)
+	})
+
+	t.Run("limit caps results", func(t *testing.T) {
+		t.Parallel()
+		result := bm25Rank(memories, "go", 1)
+		assert.LessOrEqual(t, len(result), 1)
+	})
+
+	t.Run("empty query returns nil", func(t *testing.T) {
+		t.Parallel()
+		result := bm25Rank(memories, "", 5)
+		assert.Nil(t, result)
+	})
+
+	t.Run("all-stopword query returns nil", func(t *testing.T) {
+		t.Parallel()
+		result := bm25Rank(memories, "the a an and or", 5)
+		assert.Nil(t, result)
+	})
+
+	t.Run("empty memories returns nil", func(t *testing.T) {
+		t.Parallel()
+		result := bm25Rank(nil, "go modules", 5)
+		assert.Nil(t, result)
+	})
+
+	t.Run("zero limit returns nil", func(t *testing.T) {
+		t.Parallel()
+		result := bm25Rank(memories, "go", 0)
+		assert.Nil(t, result)
+	})
+
+	t.Run("unrelated query excludes all", func(t *testing.T) {
+		t.Parallel()
+		// "zzzyyyxxx" doesn't match any token in the memory list.
+		result := bm25Rank(memories, "zzzyyyxxx", 5)
+		assert.Empty(t, result)
+	})
+
+	t.Run("results in descending score order", func(t *testing.T) {
+		t.Parallel()
+		mems := []database.UserMemory{
+			{Memory: "Go language features design patterns"},
+			{Memory: "Go language"},
+			{Memory: "Python language features"},
+		}
+		result := bm25Rank(mems, "go language features", 3)
+		require.Len(t, result, 3)
+		// First result must score >= second.
+		first := bm25Rank(mems, "go language features", 1)
+		second := bm25Rank(mems, "go language features", 2)
+		require.NotEmpty(t, first)
+		require.Len(t, second, 2)
+		assert.Equal(t, first[0], second[0])
+	})
+}
diff --git a/pkg/runtime/inject_memories_test.go b/pkg/runtime/inject_memories_test.go
index fc73d8919..53ecc5fe1 100644
--- a/pkg/runtime/inject_memories_test.go
+++ b/pkg/runtime/inject_memories_test.go
@@ -2,6 +2,8 @@ package runtime
 
 import (
 	"context"
+	"errors"
+	"strings"
 	"testing"
 
 	"github.com/stretchr/testify/assert"
@@ -10,18 +12,65 @@ import (
 	"github.com/docker/docker-agent/pkg/agent"
 	"github.com/docker/docker-agent/pkg/config/latest"
 	"github.com/docker/docker-agent/pkg/hooks"
+	"github.com/docker/docker-agent/pkg/memory/database"
+	"github.com/docker/docker-agent/pkg/team"
+	memtool "github.com/docker/docker-agent/pkg/tools/builtin/memory"
 )
 
-// TestInjectMemoriesBuiltin_ReturnsNilWhenNoopStub verifies that the scaffold
-// implementation is a no-op: it returns (nil, nil) so the hook pipeline
-// treats it as contributing no additional context.
-func TestInjectMemoriesBuiltin_ReturnsNilWhenNoopStub(t *testing.T) {
+// buildMinimalRuntime constructs a LocalRuntime containing only the given
+// agent, with no model provider or session compaction. Suitable for unit
+// tests that call hooks directly without running the full run-loop.
+func buildMinimalRuntime(t *testing.T, a *agent.Agent) *LocalRuntime {
+	t.Helper()
+	tm := team.New(team.WithAgents(a))
+	rt, err := NewLocalRuntime(tm, WithSessionCompaction(false), WithModelStore(mockModelStore{}))
+	require.NoError(t, err)
+	return rt
+}
+
+// fakeMemDB is a minimal in-memory DB for testing.
+type fakeMemDB struct {
+	memories       []database.UserMemory
+	getMemoriesErr error
+	searchErr      error
+	getCalls       int
+}
+
+func (f *fakeMemDB) GetMemories(_ context.Context) ([]database.UserMemory, error) {
+	f.getCalls++
+	return f.memories, f.getMemoriesErr
+}
+
+func (f *fakeMemDB) AddMemory(_ context.Context, _ database.UserMemory) error    { return nil }
+func (f *fakeMemDB) DeleteMemory(_ context.Context, _ database.UserMemory) error { return nil }
+func (f *fakeMemDB) UpdateMemory(_ context.Context, _ database.UserMemory) error { return nil }
+func (f *fakeMemDB) SearchMemories(_ context.Context, _, _ string) ([]database.UserMemory, error) {
+	return nil, f.searchErr
+}
+
+// TestInjectMemoriesBuiltin_NilInputGuards verifies that the builtin is a no-op
+// for inputs that carry no useful query.
+func TestInjectMemoriesBuiltin_NilInputGuards(t *testing.T) {
 	t.Parallel()
 
 	rt := &LocalRuntime{}
-	out, err := rt.injectMemoriesBuiltin(context.Background(), &hooks.Input{AgentName: "a"}, nil)
-	require.NoError(t, err)
-	assert.Nil(t, out)
+
+	tests := []struct {
+		name string
+		in   *hooks.Input
+	}{
+		{"nil input", nil},
+		{"empty AgentName", &hooks.Input{LastUserMessage: "hello"}},
+		{"empty LastUserMessage", &hooks.Input{AgentName: "agent"}},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			t.Parallel()
+			out, err := rt.injectMemoriesBuiltin(t.Context(), tt.in, nil)
+			require.NoError(t, err)
+			assert.Nil(t, out)
+		})
+	}
 }
 
 // TestApplyInjectMemoriesDefault verifies that the turn_start hook entry is
@@ -38,7 +87,7 @@ func TestApplyInjectMemoriesDefault(t *testing.T) {
 
 	t.Run("enabled appends hook to nil cfg", func(t *testing.T) {
 		t.Parallel()
-		a := agent.New("a", "", agent.WithInjectMemories(true, 5))
+		a := agent.New("a", "", agent.WithInjectMemories(true, 5, ""))
 		got := applyInjectMemoriesDefault(nil, a)
 		require.NotNil(t, got)
 		require.Len(t, got.TurnStart, 1)
@@ -48,7 +97,7 @@ func TestApplyInjectMemoriesDefault(t *testing.T) {
 
 	t.Run("enabled appends hook to existing cfg", func(t *testing.T) {
 		t.Parallel()
-		a := agent.New("a", "", agent.WithInjectMemories(true, 0))
+		a := agent.New("a", "", agent.WithInjectMemories(true, 0, ""))
 		existing := &hooks.Config{
 			TurnStart: []hooks.Hook{{Type: hooks.HookTypeBuiltin, Command: "add_date"}},
 		}
@@ -74,8 +123,181 @@ func TestEffectiveMaxInjectMemories(t *testing.T) {
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
 			t.Parallel()
-			a := agent.New("a", "", agent.WithInjectMemories(true, tt.max))
+			a := agent.New("a", "", agent.WithInjectMemories(true, tt.max, ""))
 			assert.Equal(t, tt.want, effectiveMaxInjectMemories(a))
 		})
 	}
 }
+
+// TestFormatMemoriesXML verifies XML output structure and escaping.
+func TestFormatMemoriesXML(t *testing.T) {
+	t.Parallel()
+
+	t.Run("single memory no category", func(t *testing.T) {
+		t.Parallel()
+		mems := []database.UserMemory{{Memory: "User prefers dark mode"}}
+		out := formatMemoriesXML(mems)
+		assert.Contains(t, out, "<memories>")
+		assert.Contains(t, out, "</memories>")
+		assert.Contains(t, out, "<memory>User prefers dark mode</memory>")
+	})
+
+	t.Run("memory with category", func(t *testing.T) {
+		t.Parallel()
+		mems := []database.UserMemory{{Memory: "Favourite editor is Vim", Category: "preference"}}
+		out := formatMemoriesXML(mems)
+		assert.Contains(t, out, `category="preference"`)
+		assert.Contains(t, out, "Favourite editor is Vim")
+	})
+
+	t.Run("XML special chars escaped in content", func(t *testing.T) {
+		t.Parallel()
+		mems := []database.UserMemory{{Memory: "User said <hello> & \"goodbye\""}}
+		out := formatMemoriesXML(mems)
+		assert.NotContains(t, out, "<hello>")
+		assert.Contains(t, out, "&lt;hello&gt;")
+		assert.Contains(t, out, "&amp;")
+	})
+
+	t.Run("XML special chars escaped in category", func(t *testing.T) {
+		t.Parallel()
+		mems := []database.UserMemory{{Memory: "test", Category: `cat"egory`}}
+		out := formatMemoriesXML(mems)
+		assert.Contains(t, out, "cat")
+		assert.NotContains(t, out, `cat"egory`)
+	})
+}
+
+// buildAgentWithMemoryToolset creates an agent with an in-memory fake DB,
+// wired through a real memory.ToolSet so lookupMemoryDB can find it.
+func buildAgentWithMemoryToolset(t *testing.T, db memtool.DB) *agent.Agent {
+	t.Helper()
+	ts := memtool.New(db)
+	return agent.New("testAgent", "",
+		agent.WithModel(&mockProvider{id: "test/mock"}),
+		agent.WithToolSets(ts),
+		agent.WithInjectMemories(true, 3, latest.InjectMemoriesStrategyLocal),
+	)
+}
+
+// TestInjectMemoriesBuiltin_LocalStrategy exercises the full retrieval path
+// through injectMemoriesBuiltin with a fake memory DB.
+func TestInjectMemoriesBuiltin_LocalStrategy(t *testing.T) {
+	t.Parallel()
+
+	t.Run("ranked hits produce XML output", func(t *testing.T) {
+		t.Parallel()
+		db := &fakeMemDB{memories: []database.UserMemory{
+			{Memory: "I love Go", Category: "preference"},
+			{Memory: "Python is great"},
+			{Memory: "Go modules are fun"},
+		}}
+		a := buildAgentWithMemoryToolset(t, db)
+		rt := buildMinimalRuntime(t, a)
+
+		out, err := rt.injectMemoriesBuiltin(t.Context(), &hooks.Input{
+			AgentName:       "testAgent",
+			LastUserMessage: "go modules",
+		}, nil)
+		require.NoError(t, err)
+		require.NotNil(t, out)
+		require.NotNil(t, out.HookSpecificOutput)
+		ctx := out.HookSpecificOutput.AdditionalContext
+		assert.Contains(t, ctx, "<memories>")
+		// Best match for "go modules" query.
+		assert.Contains(t, ctx, "Go modules are fun")
+	})
+
+	t.Run("empty DB returns nil", func(t *testing.T) {
+		t.Parallel()
+		db := &fakeMemDB{}
+		a := buildAgentWithMemoryToolset(t, db)
+		rt := buildMinimalRuntime(t, a)
+
+		out, err := rt.injectMemoriesBuiltin(t.Context(), &hooks.Input{
+			AgentName:       "testAgent",
+			LastUserMessage: "anything",
+		}, nil)
+		require.NoError(t, err)
+		assert.Nil(t, out)
+	})
+
+	t.Run("no matching memories returns nil", func(t *testing.T) {
+		t.Parallel()
+		db := &fakeMemDB{memories: []database.UserMemory{
+			// Only stopwords — tokeniser will produce no terms.
+			{Memory: "the a an and or"},
+		}}
+		a := buildAgentWithMemoryToolset(t, db)
+		rt := buildMinimalRuntime(t, a)
+
+		out, err := rt.injectMemoriesBuiltin(t.Context(), &hooks.Input{
+			AgentName:       "testAgent",
+			LastUserMessage: "the a an",
+		}, nil)
+		require.NoError(t, err)
+		assert.Nil(t, out)
+	})
+
+	t.Run("GetMemories error returns nil without bubbling", func(t *testing.T) {
+		t.Parallel()
+		db := &fakeMemDB{getMemoriesErr: errors.New("disk error")}
+		a := buildAgentWithMemoryToolset(t, db)
+		rt := buildMinimalRuntime(t, a)
+
+		out, err := rt.injectMemoriesBuiltin(t.Context(), &hooks.Input{
+			AgentName:       "testAgent",
+			LastUserMessage: "hello",
+		}, nil)
+		require.NoError(t, err)
+		assert.Nil(t, out)
+	})
+
+	t.Run("max inject memories respected", func(t *testing.T) {
+		t.Parallel()
+		mems := []database.UserMemory{
+			{Memory: "Go language features"},
+			{Memory: "Go concurrency patterns"},
+			{Memory: "Go interface design"},
+			{Memory: "Go generics syntax"},
+			{Memory: "Go toolchain usage"},
+		}
+		db := &fakeMemDB{memories: mems}
+		// Limit to 2.
+		a := agent.New("testAgent", "",
+			agent.WithModel(&mockProvider{id: "test/mock"}),
+			agent.WithToolSets(memtool.New(db)),
+			agent.WithInjectMemories(true, 2, latest.InjectMemoriesStrategyLocal),
+		)
+		rt := buildMinimalRuntime(t, a)
+
+		out, err := rt.injectMemoriesBuiltin(t.Context(), &hooks.Input{
+			AgentName:       "testAgent",
+			LastUserMessage: "Go language",
+		}, nil)
+		require.NoError(t, err)
+		require.NotNil(t, out)
+		// Count <memory> tags — should be at most 2.
+		count := strings.Count(out.HookSpecificOutput.AdditionalContext, "<memory")
+		assert.LessOrEqual(t, count, 2)
+	})
+}
+
+// TestInjectMemoriesBuiltin_UnknownStrategy verifies graceful degradation.
+func TestInjectMemoriesBuiltin_UnknownStrategy(t *testing.T) {
+	t.Parallel()
+	db := &fakeMemDB{memories: []database.UserMemory{{Memory: "something relevant"}}}
+	a := agent.New("testAgent", "",
+		agent.WithModel(&mockProvider{id: "test/mock"}),
+		agent.WithToolSets(memtool.New(db)),
+		agent.WithInjectMemories(true, 5, "bogus"),
+	)
+	rt := buildMinimalRuntime(t, a)
+
+	out, err := rt.injectMemoriesBuiltin(t.Context(), &hooks.Input{
+		AgentName:       "testAgent",
+		LastUserMessage: "something",
+	}, nil)
+	require.NoError(t, err)
+	assert.Nil(t, out)
+}
diff --git a/pkg/teamloader/teamloader.go b/pkg/teamloader/teamloader.go
index fcb22a215..2fdd9f19d 100644
--- a/pkg/teamloader/teamloader.go
+++ b/pkg/teamloader/teamloader.go
@@ -179,6 +179,7 @@ func LoadWithConfig(ctx context.Context, agentSource config.Source, runConfig *c
 			agent.WithInjectMemories(
 				agentConfig.InjectMemories,
 				agentConfig.MaxInjectMemories,
+				agentConfig.InjectMemoriesStrategy,
 			),
 			agent.WithMaxIterations(agentConfig.MaxIterations),
 			agent.WithMaxConsecutiveToolCalls(agentConfig.MaxConsecutiveToolCalls),
diff --git a/pkg/tools/builtin/memory/memory.go b/pkg/tools/builtin/memory/memory.go
index 1e200346e..595eee5c7 100644
--- a/pkg/tools/builtin/memory/memory.go
+++ b/pkg/tools/builtin/memory/memory.go
@@ -90,6 +90,16 @@ func NewWithPath(manager DB, dbPath string) *ToolSet {
 	}
 }
 
+// DB returns the underlying memory database. Used by the runtime's
+// inject_memories turn_start builtin to bypass the tool-call layer.
+func (t *ToolSet) DB() DB { return t.db }
+
+// SetDB replaces the underlying database. Called once per agent on first
+// turn_start via lookupMemoryDB, which interposes an invalidatingDB wrapper
+// so writes through the agent's own memory tools also bump the snapshot
+// generation counter.
+func (t *ToolSet) SetDB(db DB) { t.db = db }
+
 // Describe returns a short, user-visible description of this toolset instance.
 func (t *ToolSet) Describe() string {
 	if t.path != "" {

From b82bd8f43ba7c1a8cc84ab81606ea55c86356758 Mon Sep 17 00:00:00 2001
From: hjeddad <hamza-jeddad@users.noreply.github.com>
Date: Mon, 8 Jun 2026 20:46:56 +0000
Subject: [PATCH 5/6] feat(memory): frozen snapshot and cache invalidation
 (#3017)

- Add memorySnapshotCache (RWMutex + double-check) to avoid SQLite hit per turn
- Add invalidatingDB wrapper that bumps atomic generation counter on writes
- Wire memSnapshots and memDBs/memDBsMu onto LocalRuntime struct
- Initialise memSnapshots in NewLocalRuntime
- Update lookupMemoryDB to memoize invalidatingDB wrapper; call mt.SetDB so
  writes through agent memory tools (add_memory etc.) also bump the counter
- injectMemoriesBuiltin uses snapshot cache for local strategy
- Tests: cache hit, invalidation on add/update/delete, concurrent reads,
  failed-write does not advance generation, atomic gen counter
---
 pkg/runtime/inject_memories.go               |  49 ++++-
 pkg/runtime/inject_memories_snapshot.go      | 114 ++++++++++
 pkg/runtime/inject_memories_snapshot_test.go | 206 +++++++++++++++++++
 pkg/runtime/runtime.go                       |  15 ++
 4 files changed, 374 insertions(+), 10 deletions(-)
 create mode 100644 pkg/runtime/inject_memories_snapshot.go
 create mode 100644 pkg/runtime/inject_memories_snapshot_test.go

diff --git a/pkg/runtime/inject_memories.go b/pkg/runtime/inject_memories.go
index 23de92496..b3ebfceb7 100644
--- a/pkg/runtime/inject_memories.go
+++ b/pkg/runtime/inject_memories.go
@@ -62,7 +62,10 @@ func effectiveMaxInjectMemories(a *agent.Agent) int {
 //
 // Only the "local" strategy is implemented. It ranks all stored memories
 // with an in-process BM25 scorer against the last user message — cheap,
-// deterministic, and requires no extra model call.
+// deterministic, and requires no extra model call. The local strategy uses
+// memorySnapshotCache to avoid a SQLite round-trip on every turn; the
+// snapshot is invalidated whenever a memory write occurs via the agent's
+// own memory tools (add_memory, update_memory, delete_memory).
 //
 // No-op when:
 //   - input is missing or AgentName/LastUserMessage are empty;
@@ -97,7 +100,12 @@ func (r *LocalRuntime) injectMemoriesBuiltin(ctx context.Context, in *hooks.Inpu
 	var hits []database.UserMemory
 	switch strategy {
 	case latest.InjectMemoriesStrategyLocal:
-		all, err := db.GetMemories(ctx)
+		var all []database.UserMemory
+		if r.memSnapshots != nil {
+			all, err = r.memSnapshots.get(ctx, a.Name(), db)
+		} else {
+			all, err = db.GetMemories(ctx)
+		}
 		if err != nil {
 			slog.WarnContext(ctx, "inject_memories: GetMemories failed",
 				"agent", a.Name(), "error", err)
@@ -119,20 +127,41 @@ func (r *LocalRuntime) injectMemoriesBuiltin(ctx context.Context, in *hooks.Inpu
 	return hooks.NewAdditionalContextOutput(hooks.EventTurnStart, formatMemoriesXML(hits)), nil
 }
 
-// lookupMemoryDB returns the memory DB for the agent's first memory
-// toolset, or (nil, false) when the agent has no such toolset.
+// lookupMemoryDB returns the invalidatingDB wrapper for the agent's first
+// memory toolset. The wrapper is memoised in r.memDBs so the same instance
+// (and its generation counter) is reused across turns.
 //
-// The first match among the agent's toolsets is used; if an agent has
-// multiple memory toolsets, only the first is used for injection — matching
-// the existing tools.As behaviour throughout the codebase.
+// First-call side effects:
+//   - Wraps the raw DB in an invalidatingDB that bumps a generation counter
+//     on every write.
+//   - Calls mt.SetDB(wrapped) so writes through the agent's own memory tools
+//     (add_memory, update_memory, delete_memory) also advance the counter and
+//     trigger snapshot invalidation on the next turn.
 //
 // Note: we do not call ToolSet.Start() here. The memory toolset's DB is
 // opened eagerly inside CreateToolSet (the sqlite.NewMemoryDatabase call),
-// so GetMemories is safe to call before Start() is invoked.
-func (r *LocalRuntime) lookupMemoryDB(a *agent.Agent) (memory.DB, bool) {
+// so GetMemories is safe to call before Start() is invoked. This is an
+// implicit contract documented here: if Start() ever becomes load-bearing,
+// the runtime's ensureToolSetsAreStarted path (called during model invocation
+// on each turn) will cover it before the hook runs.
+func (r *LocalRuntime) lookupMemoryDB(a *agent.Agent) (*invalidatingDB, bool) {
+	name := a.Name()
+
+	r.memDBsMu.Lock()
+	defer r.memDBsMu.Unlock()
+	if r.memDBs == nil {
+		r.memDBs = make(map[string]*invalidatingDB)
+	}
+	if db, ok := r.memDBs[name]; ok {
+		return db, true
+	}
+
 	for _, ts := range a.ToolSets() {
 		if mt, ok := tools.As[*memory.ToolSet](ts); ok {
-			return mt.DB(), true
+			wrapped := newInvalidatingDB(mt.DB())
+			mt.SetDB(wrapped)
+			r.memDBs[name] = wrapped
+			return wrapped, true
 		}
 	}
 	return nil, false
diff --git a/pkg/runtime/inject_memories_snapshot.go b/pkg/runtime/inject_memories_snapshot.go
new file mode 100644
index 000000000..4fc379d70
--- /dev/null
+++ b/pkg/runtime/inject_memories_snapshot.go
@@ -0,0 +1,114 @@
+package runtime
+
+import (
+	"context"
+	"sync"
+	"sync/atomic"
+
+	"github.com/docker/docker-agent/pkg/memory/database"
+	memory "github.com/docker/docker-agent/pkg/tools/builtin/memory"
+)
+
+// memorySnapshotCache holds frozen copies of each agent's memory list,
+// keyed by agent name. The cache is invalidated by bumping a per-agent
+// generation counter via invalidatingDB; subsequent reads detect the bump
+// and refresh from the underlying DB.
+//
+// Concurrency model: the cache is read-mostly (one read per turn start),
+// so an RWMutex with a double-checked write path is used. A narrow race
+// between a concurrent AddMemory and a get() is safe: at worst an extra
+// GetMemories call is issued — the double-check inside the write lock
+// prevents duplicate refreshes.
+type memorySnapshotCache struct {
+	mu      sync.RWMutex
+	entries map[string]*memorySnapshotEntry
+}
+
+type memorySnapshotEntry struct {
+	// gen is the generation at which snap was taken. If the DB's
+	// generation counter has advanced, snap is stale and must be
+	// refreshed.
+	gen  uint64
+	snap []database.UserMemory
+}
+
+func newMemorySnapshotCache() *memorySnapshotCache {
+	return &memorySnapshotCache{entries: make(map[string]*memorySnapshotEntry)}
+}
+
+// get returns the current snapshot for agentName, refreshing from db when
+// the cached entry is missing or stale (db's generation has advanced).
+func (c *memorySnapshotCache) get(ctx context.Context, agentName string, db *invalidatingDB) ([]database.UserMemory, error) {
+	currentGen := db.gen()
+
+	c.mu.RLock()
+	e := c.entries[agentName]
+	c.mu.RUnlock()
+	if e != nil && e.gen == currentGen {
+		return e.snap, nil
+	}
+
+	// Refresh under write lock with double-check to avoid duplicate
+	// GetMemories calls when multiple goroutines race to refresh.
+	c.mu.Lock()
+	defer c.mu.Unlock()
+	e = c.entries[agentName]
+	if e != nil && e.gen == currentGen {
+		return e.snap, nil
+	}
+
+	fresh, err := db.GetMemories(ctx)
+	if err != nil {
+		return nil, err
+	}
+	c.entries[agentName] = &memorySnapshotEntry{gen: currentGen, snap: fresh}
+	return fresh, nil
+}
+
+// invalidatingDB wraps a memory.DB and bumps an atomic generation counter on
+// any write (AddMemory, UpdateMemory, DeleteMemory). The runtime installs
+// this wrapper once per agent (via lookupMemoryDB → mt.SetDB(wrapped)), so
+// writes that go through the agent's own memory tools also advance the
+// counter and trigger snapshot invalidation on the next turn.
+//
+// Note: this only covers writes that go through the wrapped instance's
+// methods. External writes (e.g. direct SQLite file manipulation) will not
+// advance the counter; the snapshot may go stale in that scenario. Direct
+// SQLite access is not a supported runtime operation and is documented here
+// as the only known counter-invalidation gap.
+type invalidatingDB struct {
+	memory.DB
+
+	genVal atomic.Uint64
+}
+
+func newInvalidatingDB(db memory.DB) *invalidatingDB {
+	return &invalidatingDB{DB: db}
+}
+
+func (d *invalidatingDB) gen() uint64 { return d.genVal.Load() }
+func (d *invalidatingDB) bump()       { d.genVal.Add(1) }
+
+func (d *invalidatingDB) AddMemory(ctx context.Context, m database.UserMemory) error {
+	if err := d.DB.AddMemory(ctx, m); err != nil {
+		return err
+	}
+	d.bump()
+	return nil
+}
+
+func (d *invalidatingDB) UpdateMemory(ctx context.Context, m database.UserMemory) error {
+	if err := d.DB.UpdateMemory(ctx, m); err != nil {
+		return err
+	}
+	d.bump()
+	return nil
+}
+
+func (d *invalidatingDB) DeleteMemory(ctx context.Context, m database.UserMemory) error {
+	if err := d.DB.DeleteMemory(ctx, m); err != nil {
+		return err
+	}
+	d.bump()
+	return nil
+}
diff --git a/pkg/runtime/inject_memories_snapshot_test.go b/pkg/runtime/inject_memories_snapshot_test.go
new file mode 100644
index 000000000..9cd49391a
--- /dev/null
+++ b/pkg/runtime/inject_memories_snapshot_test.go
@@ -0,0 +1,206 @@
+package runtime
+
+import (
+	"context"
+	"errors"
+	"sync"
+	"sync/atomic"
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+
+	"github.com/docker/docker-agent/pkg/memory/database"
+)
+
+// countingDB counts calls to GetMemories for cache-hit verification.
+type countingDB struct {
+	fakeMemDB
+
+	mu       sync.Mutex
+	getCalls int
+}
+
+func (c *countingDB) GetMemories(ctx context.Context) ([]database.UserMemory, error) {
+	c.mu.Lock()
+	c.getCalls++
+	c.mu.Unlock()
+	return c.fakeMemDB.GetMemories(ctx)
+}
+
+func (c *countingDB) calls() int {
+	c.mu.Lock()
+	defer c.mu.Unlock()
+	return c.getCalls
+}
+
+// TestSnapshot_CachesAcrossCalls verifies that two consecutive get() calls
+// with the same generation only issue one GetMemories call.
+func TestSnapshot_CachesAcrossCalls(t *testing.T) {
+	t.Parallel()
+
+	raw := &countingDB{fakeMemDB: fakeMemDB{memories: []database.UserMemory{
+		{Memory: "cached memory"},
+	}}}
+	wrapped := newInvalidatingDB(raw)
+	cache := newMemorySnapshotCache()
+
+	m1, err := cache.get(t.Context(), "agent", wrapped)
+	require.NoError(t, err)
+	require.Len(t, m1, 1)
+
+	m2, err := cache.get(t.Context(), "agent", wrapped)
+	require.NoError(t, err)
+	require.Len(t, m2, 1)
+
+	assert.Equal(t, 1, raw.calls(), "expected exactly one GetMemories call for two reads at same generation")
+}
+
+// TestSnapshot_InvalidatesOnAdd verifies that AddMemory bumps the generation
+// and causes the next get() to re-fetch from the DB.
+func TestSnapshot_InvalidatesOnAdd(t *testing.T) {
+	t.Parallel()
+
+	raw := &countingDB{fakeMemDB: fakeMemDB{memories: []database.UserMemory{
+		{Memory: "original"},
+	}}}
+	wrapped := newInvalidatingDB(raw)
+	cache := newMemorySnapshotCache()
+
+	_, err := cache.get(t.Context(), "agent", wrapped)
+	require.NoError(t, err)
+	assert.Equal(t, 1, raw.calls())
+
+	// Simulate add_memory.
+	require.NoError(t, wrapped.AddMemory(t.Context(), database.UserMemory{Memory: "new"}))
+
+	_, err = cache.get(t.Context(), "agent", wrapped)
+	require.NoError(t, err)
+	assert.Equal(t, 2, raw.calls(), "expected re-fetch after AddMemory")
+}
+
+// TestSnapshot_InvalidatesOnUpdate verifies UpdateMemory triggers invalidation.
+func TestSnapshot_InvalidatesOnUpdate(t *testing.T) {
+	t.Parallel()
+
+	raw := &countingDB{fakeMemDB: fakeMemDB{memories: []database.UserMemory{{Memory: "v1"}}}}
+	wrapped := newInvalidatingDB(raw)
+	cache := newMemorySnapshotCache()
+
+	_, err := cache.get(t.Context(), "agent", wrapped)
+	require.NoError(t, err)
+	assert.Equal(t, 1, raw.calls())
+
+	require.NoError(t, wrapped.UpdateMemory(t.Context(), database.UserMemory{Memory: "v2"}))
+
+	_, err = cache.get(t.Context(), "agent", wrapped)
+	require.NoError(t, err)
+	assert.Equal(t, 2, raw.calls(), "expected re-fetch after UpdateMemory")
+}
+
+// TestSnapshot_InvalidatesOnDelete verifies DeleteMemory triggers invalidation.
+func TestSnapshot_InvalidatesOnDelete(t *testing.T) {
+	t.Parallel()
+
+	raw := &countingDB{fakeMemDB: fakeMemDB{memories: []database.UserMemory{{Memory: "to delete"}}}}
+	wrapped := newInvalidatingDB(raw)
+	cache := newMemorySnapshotCache()
+
+	_, err := cache.get(t.Context(), "agent", wrapped)
+	require.NoError(t, err)
+	assert.Equal(t, 1, raw.calls())
+
+	require.NoError(t, wrapped.DeleteMemory(t.Context(), database.UserMemory{Memory: "to delete"}))
+
+	_, err = cache.get(t.Context(), "agent", wrapped)
+	require.NoError(t, err)
+	assert.Equal(t, 2, raw.calls(), "expected re-fetch after DeleteMemory")
+}
+
+// TestSnapshot_ConcurrentReads verifies no race conditions under concurrent
+// get() calls. Run with -race to catch data races.
+func TestSnapshot_ConcurrentReads(t *testing.T) {
+	t.Parallel()
+
+	raw := &countingDB{fakeMemDB: fakeMemDB{memories: []database.UserMemory{
+		{Memory: "concurrent memory"},
+	}}}
+	wrapped := newInvalidatingDB(raw)
+	cache := newMemorySnapshotCache()
+
+	const N = 50
+	var wg sync.WaitGroup
+	wg.Add(N)
+	for range N {
+		go func() {
+			defer wg.Done()
+			m, err := cache.get(t.Context(), "agent", wrapped)
+			assert.NoError(t, err)
+			assert.Len(t, m, 1)
+		}()
+	}
+	wg.Wait()
+
+	// All goroutines raced to refresh; due to the double-check lock at
+	// most a handful of extra GetMemories calls are expected — but never
+	// more than N. We simply assert it is well below N to catch a broken
+	// implementation.
+	assert.LessOrEqual(t, raw.calls(), N)
+	assert.GreaterOrEqual(t, raw.calls(), 1)
+}
+
+// TestInvalidatingDB_FailedWriteDoesNotBump verifies that if the underlying
+// write fails, the generation is NOT advanced (snapshot remains valid).
+func TestInvalidatingDB_FailedWriteDoesNotBump(t *testing.T) {
+	t.Parallel()
+
+	writeErr := errors.New("write failed")
+	raw := &fakeMemDB{getMemoriesErr: nil}
+	raw.memories = []database.UserMemory{{Memory: "existing"}}
+
+	// Stub out errors for write ops.
+	failDB := &writeFailing{fakeMemDB: raw, err: writeErr}
+	wrapped := newInvalidatingDB(failDB)
+
+	genBefore := wrapped.gen()
+
+	err := wrapped.AddMemory(t.Context(), database.UserMemory{Memory: "new"})
+	require.Error(t, err)
+	assert.Equal(t, genBefore, wrapped.gen(), "generation must not advance on failed write")
+}
+
+// TestInvalidatingDB_AtomicGen verifies gen() / bump() are race-free.
+func TestInvalidatingDB_AtomicGen(t *testing.T) {
+	t.Parallel()
+
+	raw := &fakeMemDB{}
+	wrapped := newInvalidatingDB(raw)
+
+	var ops atomic.Int32
+	const N = 200
+	var wg sync.WaitGroup
+	wg.Add(N)
+	for range N {
+		go func() {
+			defer wg.Done()
+			wrapped.bump()
+			ops.Add(1)
+		}()
+	}
+	wg.Wait()
+	assert.Equal(t, uint64(N), wrapped.gen())
+}
+
+// writeFailing wraps a fakeMemDB and returns an error for all write operations.
+type writeFailing struct {
+	*fakeMemDB
+
+	err error
+}
+
+func (w *writeFailing) AddMemory(_ context.Context, _ database.UserMemory) error    { return w.err }
+func (w *writeFailing) UpdateMemory(_ context.Context, _ database.UserMemory) error { return w.err }
+func (w *writeFailing) DeleteMemory(_ context.Context, _ database.UserMemory) error { return w.err }
+func (w *writeFailing) SearchMemories(_ context.Context, _, _ string) ([]database.UserMemory, error) {
+	return nil, nil
+}
diff --git a/pkg/runtime/runtime.go b/pkg/runtime/runtime.go
index 45d64122f..4b2f4eed5 100644
--- a/pkg/runtime/runtime.go
+++ b/pkg/runtime/runtime.go
@@ -215,6 +215,18 @@ type LocalRuntime struct {
 	// construction, so no locking is needed.
 	hooksExecByAgent map[string]*hooks.Executor
 
+	// memSnapshots is the per-agent frozen memory snapshot cache used by
+	// the inject_memories turn_start builtin (local strategy). It is
+	// initialised once in [NewLocalRuntime] and never replaced.
+	memSnapshots *memorySnapshotCache
+	// memDBsMu guards memDBs. lookupMemoryDB is called from turn_start
+	// (potentially concurrently for multi-agent runtimes) so a mutex is
+	// needed even though writes only happen once per agent.
+	memDBsMu sync.Mutex
+	// memDBs memoises the invalidatingDB wrapper per agent so the same
+	// generation counter instance is reused across turns.
+	memDBs map[string]*invalidatingDB
+
 	// transforms is the runtime's [MessageTransform] chain, applied to
 	// every LLM call in registration order. Populated by
 	// [NewLocalRuntime] (for the runtime-shipped strip transform) and by
@@ -536,6 +548,9 @@ func NewLocalRuntime(agents *team.Team, opts ...Opt) (*LocalRuntime, error) {
 	if r.hooksRegistry == nil {
 		r.hooksRegistry = hooks.NewRegistry()
 	}
+	if r.memSnapshots == nil {
+		r.memSnapshots = newMemorySnapshotCache()
+	}
 	if err := builtins.Register(r.hooksRegistry); err != nil {
 		return nil, fmt.Errorf("register builtin hooks: %w", err)
 	}

From d7e6892a469c73e78238842615a4d2c3e41477c0 Mon Sep 17 00:00:00 2001
From: hjeddad <hamza-jeddad@users.noreply.github.com>
Date: Mon, 8 Jun 2026 20:52:28 +0000
Subject: [PATCH 6/6] feat(memory): tests, docs, and example for
 inject_memories (#3018)

- Add examples/inject_memories.yaml (validated by TestJsonSchemaWorksForExamples)
- Add Inject Memories section to docs/configuration/agents/index.md:
  config table, strategies, FAQ (transient injection, snapshot invalidation)
- Add three new fields to agent schema table in agents/index.md
- Append 'See also' link from docs/tools/memory/index.md
- Update AGENTS.md: note inject_memories as runtime-private auto-injected hook
---
 AGENTS.md                          |  1 +
 docs/configuration/agents/index.md | 44 ++++++++++++++++++++++++++++++
 docs/tools/memory/index.md         |  4 +++
 examples/inject_memories.yaml      | 28 +++++++++++++++++++
 4 files changed, 77 insertions(+)
 create mode 100644 examples/inject_memories.yaml

diff --git a/AGENTS.md b/AGENTS.md
index 894cfa91f..404ec27fc 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -98,6 +98,7 @@ Before marking work as complete:
 - When adding new features to the config:
   - Update `./agent-schema.json` accordingly
   - Create an example YAML that demonstrates the new feature
+- Some config flags (`inject_memories`, `cache_response`) are wired by the runtime as auto-injected hooks rather than through `pkg/hooks/builtins`. Look in `pkg/runtime/inject_memories.go` and `pkg/runtime/cache.go` for examples.
 
 # Git Practices
 
diff --git a/docs/configuration/agents/index.md b/docs/configuration/agents/index.md
index 6ec91aff3..e03ff457c 100644
--- a/docs/configuration/agents/index.md
+++ b/docs/configuration/agents/index.md
@@ -26,6 +26,9 @@ agents:
     add_date: boolean # Optional: add date to context
     add_environment_info: boolean # Optional: add env info to context
     add_prompt_files: [list] # Optional: include additional prompt files
+    inject_memories: boolean # Optional: inject relevant memories at turn start
+    max_inject_memories: int # Optional: cap on injected memories (default: 10)
+    inject_memories_strategy: string # Optional: retrieval strategy, "local" (default)
     add_description_parameter: bool # Optional: add description to tool schema
     redact_secrets: boolean # Optional: scrub detected secrets out of tool args, outgoing chat messages, and tool output
     code_mode_tools: boolean # Optional: enable code mode tool format
@@ -85,6 +88,9 @@ agents:
 | `add_date`                  | boolean | ✗        | When `true`, injects the current date into the agent's context.                                                                                                               |
 | `add_environment_info`      | boolean | ✗        | When `true`, injects working directory, OS, CPU architecture, and git info into context.                                                                                      |
 | `add_prompt_files`          | array   | ✗        | List of file paths whose contents are appended to the system prompt. Useful for including coding standards, guidelines, or additional context.                                |
+| `inject_memories`           | boolean | ✗        | When `true`, the runtime fetches the most relevant stored memories at the start of every turn and injects them as a transient system message. Requires a `memory` toolset. See [Inject Memories](#inject-memories) below. |
+| `max_inject_memories`       | int     | ✗        | Maximum number of memories injected per turn. Default: `10`.                                                                                                                  |
+| `inject_memories_strategy`  | string  | ✗        | Retrieval strategy for `inject_memories`. `"local"` (default): in-process BM25 ranker against the user's message — cheap and deterministic. Note: an `"llm"` strategy is planned for a future release. |
 | `add_description_parameter` | boolean | ✗        | When `true`, adds agent descriptions as a parameter in tool schemas. Helps with tool selection in multi-agent scenarios.                                                      |
 | `redact_secrets`            | boolean | ✗        | When `true`, scrubs detected secrets (API keys, tokens, private keys, etc.) out of tool-call arguments, outgoing chat messages, and tool output before they reach a tool, the model, or downstream consumers. See [Redacting Secrets](#redacting-secrets) below.   |
 | `code_mode_tools`           | boolean | ✗        | When `true`, formats tool responses in a code-optimized format with structured output schemas. Useful for MCP gateway and programmatic access.                                |
@@ -151,6 +157,44 @@ Multiple processes can share the same `path:` cache file safely. Every `Store` t
 
 `Lookup` watches the file's modification time and reloads the in-memory map when the file has advanced since its last load, so writes from a sibling process become visible without a restart. The `<path>.lock` sentinel file is created on first write and never deleted: removing it would let two processes lock different inodes and lose mutual exclusion.
 
+## Inject Memories
+
+When `inject_memories: true`, the runtime retrieves the most relevant memories from the agent's memory toolset at the start of every turn and injects them as a transient system message. The injection is **never persisted** to the session transcript — it is invisible in session replays by design, matching the behaviour of `add_date` and `add_environment_info`.
+
+Requires a `memory` toolset on the same agent. If no memory toolset is configured, the runtime emits a warning and the hook is a no-op.
+
+```yaml
+agents:
+  assistant:
+    model: openai/gpt-4o-mini
+    instruction: |
+      You are a helpful assistant that remembers user preferences.
+    toolsets:
+      - type: memory
+    inject_memories: true
+    max_inject_memories: 5
+    inject_memories_strategy: local
+```
+
+### Retrieval strategies
+
+| Strategy | Description |
+| -------- | ----------- |
+| `local` (default) | In-process BM25 ranker scores all stored memories against the user's message. Cheap, deterministic, no extra model call. Uses a per-turn snapshot cache; the cache is invalidated whenever a memory is written via the agent's memory tools. |
+
+> **Note:** An `llm` strategy is planned for a future release.
+
+### FAQ
+
+**Why don't I see the injected memories in my session transcript?**
+Injection is transient by design — matching the behaviour of `add_date` and `add_environment_info`. The memories are visible to the model during the turn but are not saved to the session file.
+
+**When are memories refreshed from disk?**
+The `local` strategy maintains an in-process snapshot cache per agent. The cache is refreshed whenever a memory write occurs through the agent's own memory tools (`add_memory`, `update_memory`, `delete_memory`). Each refresh issues a single `GetMemories` call to SQLite.
+
+**What if an agent has multiple memory toolsets?**
+Only the first memory toolset found is used for injection. Configure a single `type: memory` toolset per agent for predictable behaviour.
+
 ## Redacting Secrets
 
 The `redact_secrets` flag is a single agent-level switch that scrubs accidentally leaked credentials, tokens, and private keys out of an agent's I/O. It wires up three complementary defenses:
diff --git a/docs/tools/memory/index.md b/docs/tools/memory/index.md
index d5f154338..40967a52d 100644
--- a/docs/tools/memory/index.md
+++ b/docs/tools/memory/index.md
@@ -59,3 +59,7 @@ Memories support an optional `category` field for organization and filtering. Co
 </div>
   <p>Memory is especially useful for long-running assistants that need to recall information across conversations — like coding preferences, project conventions, or context discovered during previous sessions.</p>
 </div>
+
+## See also
+
+- [Inject Memories]({{ '/configuration/agents/#inject-memories' | relative_url }}) — automatically inject relevant memories at the start of each turn without the agent needing to call `get_memories` or `search_memories` explicitly.
diff --git a/examples/inject_memories.yaml b/examples/inject_memories.yaml
new file mode 100644
index 000000000..79961eab7
--- /dev/null
+++ b/examples/inject_memories.yaml
@@ -0,0 +1,28 @@
+# Demonstrates automatic memory injection at the start of every turn.
+#
+# When inject_memories is true, the runtime fetches the most relevant
+# stored memories on each turn and injects them as a transient system
+# message. The injection is never persisted to the session transcript.
+#
+# The "local" strategy (the only one available) uses an in-process BM25
+# ranker to score all stored memories against the user's latest message.
+# Cheap, deterministic, and requires no extra model call. A per-agent
+# snapshot cache avoids a SQLite round-trip on every turn; the cache is
+# invalidated whenever a memory is written through the agent's memory tools.
+#
+# Requires a memory toolset to be configured on the agent.
+
+agents:
+  assistant:
+    model: openai/gpt-4o-mini
+    description: An assistant that remembers what you tell it
+    instruction: |
+      You are a helpful assistant. When the user shares a preference
+      or fact, store it with add_memory. Use stored memories to give
+      consistent, personalised answers.
+    toolsets:
+      - type: memory
+        path: ./inject_memories.db
+    inject_memories: true
+    max_inject_memories: 5
+    inject_memories_strategy: local