From 1be0ff376e9794bfb753a9f019d039c001d10706 Mon Sep 17 00:00:00 2001
From: Jakub Zika <zikajk@gmail.com>
Date: Thu, 15 Jan 2026 16:24:16 +0100
Subject: [PATCH 1/2] add configurable reasoning preservation for openai
 completions

Standard behavior: discard reasoning before last user message (based on OpenAI SDK).
Model-level config enables preservation (e.g. GLM-4.7 with preserved thinking).

Changes:
- prune-history: add keep-history-reasoning parameter
- Tests: cover both pruning and preservation
- Docs: add keepHistoryReasoning to model schema
---
 CHANGELOG.md                                |  2 +
 docs/configuration.md                       |  3 +-
 docs/models.md                              | 53 +++++++++++++++------
 src/eca/llm_api.clj                         |  2 +
 src/eca/llm_providers/openai_chat.clj       | 37 +++++++-------
 test/eca/llm_providers/openai_chat_test.clj | 31 +++++++++---
 6 files changed, 90 insertions(+), 38 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 56940783..4ae57c68 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,8 @@
 
 ## Unreleased
 
+- (OpenAI Chat) - Configurable reasoning history via `keepHistoryReasoning` (model-level, default: prune)
+
 ## 0.92.1
 
 - Add `x-llm-application-name: eca` to prompt requests, useful to track and get metrics when using LLM gateways.
diff --git a/docs/configuration.md b/docs/configuration.md
index bf55c215..fdd15611 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -673,7 +673,8 @@ To configure, add your OTLP collector config via `:otlp` map following [otlp aut
             thinkTagEnd?: string;
             models: {[key: string]: {
               modelName?: string;
-              extraPayload?: {[key: string]: any}
+              extraPayload?: {[key: string]: any};
+              keepHistoryReasoning?: boolean;
             }};
         }};
         defaultModel?: string;
diff --git a/docs/models.md b/docs/models.md
index cc8da224..242fab44 100644
--- a/docs/models.md
+++ b/docs/models.md
@@ -61,19 +61,20 @@ You just need to add your provider to `providers` and make sure add the required
 
 Schema:
 
-| Option                        | Type   | Description                                                                                                  | Required |
-|-------------------------------|--------|--------------------------------------------------------------------------------------------------------------|----------|
-| `api`                         | string | The API schema to use (`"openai-responses"`, `"openai-chat"`, or `"anthropic"`)                              | Yes      |
-| `url`                         | string | API URL (with support for env like `${env:MY_URL}`)                                                          | No*      |
-| `key`                         | string | API key (with support for `${env:MY_KEY}` or `{netrc:api.my-provider.com}`                                   | No*      |
-| `completionUrlRelativePath`   | string | Optional override for the completion endpoint path (see defaults below and examples like Azure)              | No       |
-| `thinkTagStart`               | string | Optional override the think start tag tag for openai-chat (Default: "<think>") api                           | No       |
-| `thinkTagEnd`                 | string | Optional override the think end tag for openai-chat (Default: "</think>") api                                | No       |
-| `httpClient`                  | map    | Allow customize the http-client for this provider requests, like changing http version                       | No       |
-| `models`                      | map    | Key: model name, value: its config                                                                           | Yes      |
-| `models <model> extraPayload` | map    | Extra payload sent in body to LLM                                                                            | No       |
-| `models <model> modelName`    | string | Override model name, useful to have multiple models with different configs and names that use same LLM model | No       |
-| `fetchModels`                 | boolean | Enable automatic model discovery from `/models` endpoint (OpenAI-compatible providers) | No |
+| Option                                | Type    | Description                                                                                                  | Required |
+|---------------------------------------|---------|--------------------------------------------------------------------------------------------------------------|----------|
+| `api`                                 | string  | The API schema to use (`"openai-responses"`, `"openai-chat"`, or `"anthropic"`)                              | Yes      |
+| `url`                                 | string  | API URL (with support for env like `${env:MY_URL}`)                                                          | No*      |
+| `key`                                 | string  | API key (with support for `${env:MY_KEY}` or `{netrc:api.my-provider.com}`                                   | No*      |
+| `completionUrlRelativePath`           | string  | Optional override for the completion endpoint path (see defaults below and examples like Azure)              | No       |
+| `thinkTagStart`                       | string  | Optional override the think start tag tag for openai-chat (Default: "<think>") api                           | No       |
+| `thinkTagEnd`                         | string  | Optional override the think end tag for openai-chat (Default: "</think>") api                                | No       |
+| `httpClient`                          | map     | Allow customize the http-client for this provider requests, like changing http version                       | No       |
+| `models`                              | map     | Key: model name, value: its config                                                                           | Yes      |
+| `models <model> extraPayload`         | map     | Extra payload sent in body to LLM                                                                            | No       |
+| `models <model> modelName`            | string  | Override model name, useful to have multiple models with different configs and names that use same LLM model | No       |
+| `models <model> keepHistoryReasoning` | boolean | Keep `reason` messages in conversation history. Default: `false`                                             | No       |
+| `fetchModels`                         | boolean | Enable automatic model discovery from `/models` endpoint (OpenAI-compatible providers)                       | No       |
 
 _* url and key will be searched as envs `<provider>_API_URL` and `<provider>_API_KEY`, they require the env to be found or config to work._
 
@@ -120,6 +121,30 @@ Examples:
 
     This way both will use gpt-5 model but one will override the reasoning to be high instead of the default.
 
+=== "History reasoning"
+
+    `keepHistoryReasoning` preserves reasoning in conversation history. Set for specific models:
+
+    ```javascript title="~/.config/eca/config.json"
+    {
+      "providers": {
+        "z-ai": {
+          "api": "openai-chat",
+          "url": "https://api.z.ai/api/paas/v4/",
+          "key": "your-api-key",
+          "models": {
+            "GLM-4.7": {
+              "keepHistoryReasoning": true,  // Preserves reasoning
+              "extraPayload": {"clear_thinking": false} // Preserved thinking (see https://docs.z.ai/guides/capabilities/thinking-mode#preserved-thinking)
+			  }
+          }
+        }
+      }
+    }
+    ```
+
+    Default: `false`.
+
 === "Dynamic model discovery"
 
     For OpenAI-compatible providers, set `fetchModels: true` to automatically discover available models:
@@ -211,7 +236,7 @@ Notes:
     3. Type the chosen method
     4. Authenticate in your browser, copy the code.
     5. Paste and send the code and done!
-    
+
 === "Codex / Openai"
 
     1. Login to Openai via the chat command `/login`.
diff --git a/src/eca/llm_api.clj b/src/eca/llm_api.clj
index 96536a8d..571b4e33 100644
--- a/src/eca/llm_api.clj
+++ b/src/eca/llm_api.clj
@@ -206,6 +206,7 @@
         (let [url-relative-path (:completionUrlRelativePath provider-config)
               think-tag-start (:thinkTagStart provider-config)
               think-tag-end (:thinkTagEnd provider-config)
+              keep-history-reasoning (:keepHistoryReasoning model-config)
               http-client (:httpClient provider-config)]
           (handler
            {:model real-model
@@ -221,6 +222,7 @@
             :url-relative-path url-relative-path
             :think-tag-start think-tag-start
             :think-tag-end think-tag-end
+            :keep-history-reasoning keep-history-reasoning
             :http-client http-client
             :api-url api-url
             :api-key api-key}
diff --git a/src/eca/llm_providers/openai_chat.clj b/src/eca/llm_providers/openai_chat.clj
index a6cfb164..18b1ce82 100644
--- a/src/eca/llm_providers/openai_chat.clj
+++ b/src/eca/llm_providers/openai_chat.clj
@@ -384,19 +384,24 @@
     (reset! reasoning-state* {:id nil :type nil :content "" :buffer ""})))
 
 (defn ^:private prune-history
-  "Ensure DeepSeek-style reasoning_content is discarded from history but kept for the active turn.
-   Only drops 'reason' messages WITH :delta-reasoning? before the last user message.
-   Think-tag based reasoning (without :delta-reasoning?) is preserved and transformed to assistant messages."
-  [messages]
-  (if-let [last-user-idx (llm-util/find-last-user-msg-idx messages)]
-    (->> messages
-         (keep-indexed (fn [i m]
-                         (when-not (and (= "reason" (:role m))
-                                        (get-in m [:content :delta-reasoning?])
-                                        (< i last-user-idx))
-                           m)))
-         vec)
-    messages))
+  "Discard reasoning messages from history.
+   Reasoning with :delta-reasoning? is preserved in the same turn (as required by Deepseek).
+   This corresponds to the implementation standard. However, it can be change it at the model level configuration.
+   Parameters:
+   - messages: the conversation history
+   - keep-history-reasoning: if true, preserve all reasoning in history"
+  [messages keep-history-reasoning]
+  (if keep-history-reasoning
+    messages
+    (if-let [last-user-idx (llm-util/find-last-user-msg-idx messages)]
+      (->> messages
+           (keep-indexed (fn [i m]
+                           (when-not (and (= "reason" (:role m))
+                                          (or (< i last-user-idx)
+                                              (not (get-in m [:content :delta-reasoning?]))))
+                             m)))
+           vec)
+      messages)))
 
 (defn chat-completion!
   "Primary entry point for OpenAI chat completions with streaming support.
@@ -406,14 +411,14 @@
    Compatible with OpenRouter and other OpenAI-compatible providers."
   [{:keys [model user-messages instructions temperature api-key api-url url-relative-path
            past-messages tools extra-payload extra-headers supports-image?
-           think-tag-start think-tag-end http-client]}
+           think-tag-start think-tag-end keep-history-reasoning http-client]}
    {:keys [on-message-received on-error on-prepare-tool-call on-tools-called on-reason on-usage-updated] :as callbacks}]
   (let [think-tag-start (or think-tag-start "<think>")
         think-tag-end (or think-tag-end "</think>")
         stream? (boolean callbacks)
         system-messages (when instructions [{:role "system" :content instructions}])
         ;; Pipeline: prune history -> normalize -> merge adjacent assistants -> filter
-        all-messages (prune-history (vec (concat past-messages user-messages)))
+        all-messages (prune-history (vec (concat past-messages user-messages)) keep-history-reasoning)
         messages (vec (concat
                        system-messages
                        (normalize-messages all-messages supports-image? think-tag-start think-tag-end)))
@@ -473,7 +478,7 @@
                                        tool-calls))
         on-tools-called-wrapper (fn on-tools-called-wrapper [tools-to-call on-tools-called handle-response]
                                   (when-let [{:keys [new-messages]} (on-tools-called tools-to-call)]
-                                    (let [pruned-messages (prune-history new-messages)
+                                    (let [pruned-messages (prune-history new-messages keep-history-reasoning)
                                           new-messages-list (vec (concat
                                                                   system-messages
                                                                   (normalize-messages pruned-messages supports-image? think-tag-start think-tag-end)))
diff --git a/test/eca/llm_providers/openai_chat_test.clj b/test/eca/llm_providers/openai_chat_test.clj
index 67c9edf8..eb1b642f 100644
--- a/test/eca/llm_providers/openai_chat_test.clj
+++ b/test/eca/llm_providers/openai_chat_test.clj
@@ -259,7 +259,7 @@
            {:role "assistant" :reasoning_content "Thinking..."}])))))
 
 (deftest prune-history-test
-  (testing "Drops reason messages WITH :delta-reasoning? before the last user message (DeepSeek)"
+  (testing "Drops all reason messages before the last user message by default"
     (is (match?
          [{:role "user" :content "Q1"}
           {:role "assistant" :content "A1"}
@@ -272,15 +272,14 @@
            {:role "assistant" :content "A1"}
            {:role "user" :content "Q2"}
            {:role "reason" :content {:text "r2" :delta-reasoning? true}}
-           {:role "assistant" :content "A2"}]))))
+           {:role "assistant" :content "A2"}]
+          false))))
 
-  (testing "Preserves reason messages WITHOUT :delta-reasoning? (think-tag based)"
+  (testing "Preserves reason messages (without :delta-reasoning?) before last user message"
     (is (match?
          [{:role "user" :content "Q1"}
-          {:role "reason" :content {:text "thinking..."}}
           {:role "assistant" :content "A1"}
           {:role "user" :content "Q2"}
-          {:role "reason" :content {:text "more thinking..."}}
           {:role "assistant" :content "A2"}]
          (#'llm-providers.openai-chat/prune-history
           [{:role "user" :content "Q1"}
@@ -288,12 +287,30 @@
            {:role "assistant" :content "A1"}
            {:role "user" :content "Q2"}
            {:role "reason" :content {:text "more thinking..."}}
-           {:role "assistant" :content "A2"}]))))
+           {:role "assistant" :content "A2"}]
+          false))))
+
+  (testing "Preserves all reasoning when keep-history-reasoning is true (Bedrock)"
+    (is (match?
+         [{:role "user" :content "Q1"}
+          {:role "reason" :content {:text "r1"}}
+          {:role "assistant" :content "A1"}
+          {:role "user" :content "Q2"}
+          {:role "reason" :content {:text "r2"}}
+          {:role "assistant" :content "A2"}]
+         (#'llm-providers.openai-chat/prune-history
+          [{:role "user" :content "Q1"}
+           {:role "reason" :content {:text "r1"}}
+           {:role "assistant" :content "A1"}
+           {:role "user" :content "Q2"}
+           {:role "reason" :content {:text "r2"}}
+           {:role "assistant" :content "A2"}]
+          true))))
 
   (testing "No user message leaves list unchanged"
     (let [msgs [{:role "assistant" :content "A"}
                 {:role "reason" :content {:text "r"}}]]
-      (is (= msgs (#'llm-providers.openai-chat/prune-history msgs))))))
+      (is (= msgs (#'llm-providers.openai-chat/prune-history msgs false))))))
 
 (deftest valid-message-test
   (testing "Tool messages are always kept"

From a62562da7ce265d2bc03a4a1179a36b73e633c3a Mon Sep 17 00:00:00 2001
From: Jakub Zika <zikajk@gmail.com>
Date: Sat, 17 Jan 2026 08:03:58 +0100
Subject: [PATCH 2/2] fix tests and improve docs

---
 docs/models.md                                            | 5 ++++-
 integration-test/integration/chat/github_copilot_test.clj | 2 +-
 integration-test/integration/chat/google_test.clj         | 2 +-
 3 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/docs/models.md b/docs/models.md
index 242fab44..de142768 100644
--- a/docs/models.md
+++ b/docs/models.md
@@ -122,8 +122,11 @@ Examples:
     This way both will use gpt-5 model but one will override the reasoning to be high instead of the default.
 
 === "History reasoning"
+	`keepHistoryReasoning` - Determines whether the model's internal reasoning chain is persisted in the conversation history for subsequent turns.
 
-    `keepHistoryReasoning` preserves reasoning in conversation history. Set for specific models:
+	- **Standard Behavior**: Most models expect reasoning blocks (e.g., `<think>` tags or `reasoning_content`) to be removed in subsequent requests to save tokens and avoid bias.
+	- **Usage**: Enable this for models that explicitly support "preserved thinking," or if you want to experiment with letting the model see its previous thought process (with XML-based reasoning).
+	- **Example**: See [GLM-4.7 with Preserved thinking](https://docs.z.ai/guides/capabilities/thinking-mode#preserved-thinking).
 
     ```javascript title="~/.config/eca/config.json"
     {
diff --git a/integration-test/integration/chat/github_copilot_test.clj b/integration-test/integration/chat/github_copilot_test.clj
index f8ba132c..a85d26f5 100644
--- a/integration-test/integration/chat/github_copilot_test.clj
+++ b/integration-test/integration/chat/github_copilot_test.clj
@@ -168,7 +168,7 @@
         (match-content chat-id "system" {:type "progress" :state "finished"})
         (is (match?
              {:input [{:role "user" :content [{:type "input_text" :text "hello!"}]}
-                      {:role "assistant" :content [{:type "output_text" :text "<think>I should say hello</think>\nhello there!"}]}
+                      {:role "assistant" :content [{:type "output_text" :text "hello there!"}]}
                       {:role "user" :content [{:type "input_text" :text "how are you?"}]}]
               :instructions (m/pred string?)}
              (llm.mocks/get-req-body :reasoning-1)))))))
diff --git a/integration-test/integration/chat/google_test.clj b/integration-test/integration/chat/google_test.clj
index 5f34d341..30222616 100644
--- a/integration-test/integration/chat/google_test.clj
+++ b/integration-test/integration/chat/google_test.clj
@@ -167,7 +167,7 @@
         (match-content chat-id "system" {:type "progress" :state "finished"})
         (is (match?
              {:input [{:role "user" :content [{:type "input_text" :text "hello!"}]}
-                      {:role "assistant" :content [{:type "output_text" :text "<thought>I should say hello</thought>\nhello there!"}]}
+                      {:role "assistant" :content [{:type "output_text" :text "hello there!"}]}
                       {:role "user" :content [{:type "input_text" :text "how are you?"}]}]
               :instructions (m/pred string?)}
              (llm.mocks/get-req-body :reasoning-1)))))))