From afb16199e6745be4cade02316527aeba171cfa80 Mon Sep 17 00:00:00 2001
From: Damian Kalinowski <damian.kalinowski@intel.com>
Date: Thu, 5 Mar 2026 16:21:21 +0100
Subject: [PATCH 1/2] v1

---
 src/llm/apis/openai_completions.cpp          | 23 +++++-
 src/llm/apis/openai_json_response.hpp        | 12 +++
 src/llm/apis/openai_request.hpp              |  2 +
 src/llm/io_processing/base_output_parser.hpp |  2 +
 src/llm/io_processing/gptoss/harmony.cpp     | 87 ++++++++++++++++++++
 src/llm/io_processing/gptoss/harmony.hpp     |  6 +-
 src/llm/io_processing/gptoss/tool_parser.cpp |  4 +
 7 files changed, 129 insertions(+), 7 deletions(-)
diff --git a/src/llm/apis/openai_completions.cpp b/src/llm/apis/openai_completions.cpp
index 0402017564..dad431395d 100644
--- a/src/llm/apis/openai_completions.cpp
+++ b/src/llm/apis/openai_completions.cpp
@@ -483,6 +483,25 @@ absl::Status OpenAIChatCompletionsHandler::parseTools() {
     }
 
     request.toolChoice = tool_choice;
+
+    it = doc.FindMember("chat_template_kwargs");
+    // If chat_template_kwargs is present, we need to check for builtin_tools list
+    if (it != doc.MemberEnd() && !it->value.IsNull()) {
+        if (!it->value.IsObject())
+            return absl::InvalidArgumentError("chat_template_kwargs is not a JSON object");
+        auto builtinToolsIt = it->value.GetObject().FindMember("builtin_tools");
+        if (builtinToolsIt != it->value.GetObject().MemberEnd() && !builtinToolsIt->value.IsNull()) {
+            if (!builtinToolsIt->value.IsArray())
+                return absl::InvalidArgumentError("builtin_tools is not an array");
+            for (size_t i = 0; i < builtinToolsIt->value.GetArray().Size(); i++) {
+                auto& toolNameValue = builtinToolsIt->value.GetArray()[i];
+                if (!toolNameValue.IsString())
+                    return absl::InvalidArgumentError("Each builtin_tool name must be a string");
+                request.allowedBuiltInTools.push_back(toolNameValue.GetString());
+            }
+        }
+    }
+
     if (jsonChanged) {
         StringBuffer buffer;
         Writer<StringBuffer> writer(buffer);
@@ -538,7 +557,7 @@ absl::StatusOr<std::optional<ov::genai::JsonContainer>> OpenAIChatCompletionsHan
 }
 
 const bool OpenAIChatCompletionsHandler::areToolsAvailable() const {
-    return !request.toolNameSchemaMap.empty();
+    return !request.toolNameSchemaMap.empty() || !request.allowedBuiltInTools.empty();
 }
 
 const OpenAIChatCompletionsRequest& OpenAIChatCompletionsHandler::getRequest() const {
@@ -1007,7 +1026,7 @@ std::string OpenAIChatCompletionsHandler::serializeUnaryResponse(const std::vect
         // "length" => due to reaching max_tokens parameter
         // "tool_calls" => generation stopped due to generated tool calls
 
-        std::optional<std::string> finishReason = mapFinishReason(generationOutput.finish_reason, !parsedOutput.toolCalls.empty());
+        std::optional<std::string> finishReason = mapFinishReason(generationOutput.finish_reason, !parsedOutput.toolCalls.empty() || !parsedOutput.builtInToolCalls.empty());
         if (!finishReason.has_value()) {
             SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Unknown finish reason: {}", static_cast<int>(generationOutput.finish_reason));
         }
diff --git a/src/llm/apis/openai_json_response.hpp b/src/llm/apis/openai_json_response.hpp
index 30eed440f6..8d1bf647f4 100644
--- a/src/llm/apis/openai_json_response.hpp
+++ b/src/llm/apis/openai_json_response.hpp
@@ -169,6 +169,18 @@ class OpenAiJsonResponse : public Writer<StringBuffer> {
 
             Writer<StringBuffer>::EndObject();
         }
+        for (const ToolCall& toolCall : parsedOutput.builtInToolCalls) {
+            StartObject();
+            String("id", toolCall.id);
+            String("type", "function");
+
+            StartObject("function");
+            String("name", toolCall.name);
+            String("arguments", toolCall.arguments);
+            Writer<StringBuffer>::EndObject();
+
+            Writer<StringBuffer>::EndObject();
+        }
         Writer<StringBuffer>::EndArray();
         Writer<StringBuffer>::EndObject();
         return true;
diff --git a/src/llm/apis/openai_request.hpp b/src/llm/apis/openai_request.hpp
index de355c12a1..409b5a455f 100644
--- a/src/llm/apis/openai_request.hpp
+++ b/src/llm/apis/openai_request.hpp
@@ -81,6 +81,8 @@ struct OpenAIChatCompletionsRequest {
     // Holds value for tool_choice field as described in https://platform.openai.com/docs/api-reference/chat/create#chat_create-tool_choice
     std::string toolChoice;
 
+    std::vector<std::string> allowedBuiltInTools;
+
     OpenAIChatCompletionsRequest() = default;
     ~OpenAIChatCompletionsRequest() = default;
 };
diff --git a/src/llm/io_processing/base_output_parser.hpp b/src/llm/io_processing/base_output_parser.hpp
index bc5d88ba4a..62cc6b68e9 100644
--- a/src/llm/io_processing/base_output_parser.hpp
+++ b/src/llm/io_processing/base_output_parser.hpp
@@ -43,6 +43,8 @@ struct ParsedOutput {
     std::string content;
     // Tool calls extracted from the response
     ToolCalls_t toolCalls;
+    // Tool calls extracted from the response that are identified as built-in (e.g., Python code execution, browser search, etc.) based on channel naming convention
+    ToolCalls_t builtInToolCalls;
     // Decoded reasoning from the response
     std::string reasoning;
 };
diff --git a/src/llm/io_processing/gptoss/harmony.cpp b/src/llm/io_processing/gptoss/harmony.cpp
index 76f8ee9191..fd7f734048 100644
--- a/src/llm/io_processing/gptoss/harmony.cpp
+++ b/src/llm/io_processing/gptoss/harmony.cpp
@@ -26,6 +26,36 @@
 namespace ovms {
 namespace openai {
 
+// Helper function to escape a string for JSON
+static std::string escapeJsonString(const std::string& input) {
+    std::string output;
+    output.reserve(input.size() + 16);  // Reserve some extra space for escapes
+    output += '"';
+    for (char c : input) {
+        switch (c) {
+        case '"':
+            output += "\\\"";
+            break;
+        case '\\':
+            output += "\\\\";
+            break;
+        case '\n':
+            output += "\\n";
+            break;
+        case '\r':
+            output += "\\r";
+            break;
+        case '\t':
+            output += "\\t";
+            break;
+        default:
+            output += c;
+        }
+    }
+    output += '"';
+    return output;
+}
+
 Harmony::Harmony(ov::genai::Tokenizer& tokenizer, const std::vector<int64_t>& tokens) :
     tokenizer(tokenizer),
     tokens(tokens) {}
@@ -121,6 +151,63 @@ ToolCalls_t Harmony::getToolCalls() {
     return toolCalls;
 }
 
+/*
+    Built-in tools calls are extracted from messages in channel "analysis" that contain "to=<builtins>.NAME" in the channel content; example:
+    <|channel|>analysis to=browser.search code<|message|>{"query": "latest developments AI technology 2025", "topn": 10, "source": "news"}<|call|>
+    
+    Also supports:
+    - "to=functions.python" format for Python code execution
+    - "analysis code" channel (implicit Python tool) when no explicit "to=" is present
+*/
+ToolCalls_t Harmony::getBuiltInToolCalls() {
+    static const std::string tool_prefix = "to=";
+    static const std::string functions_prefix = "functions.";
+    ToolCalls_t toolCalls;
+    for (const auto& msg : messages) {
+        if (startsWith(msg.getChannel(), "analysis") || startsWith(msg.getChannel(), "commentary")) {
+            size_t marker = msg.getChannel().find(tool_prefix);
+            if (marker != std::string::npos) {
+                marker += tool_prefix.length();
+                size_t firstWhiteSpaceOrSpecialBegin = msg.getChannel().find_first_of(" \t\n\r<", marker);
+                ToolCall toolCall;
+                std::string rawName;
+                if (firstWhiteSpaceOrSpecialBegin == std::string::npos) {
+                    // Take the remaining part of the string
+                    rawName = msg.getChannel().substr(marker);
+                } else {
+                    // Take up to the first whitespace or special token begin
+                    rawName = msg.getChannel().substr(marker, firstWhiteSpaceOrSpecialBegin - marker);
+                }
+
+                // Strip "functions." prefix if present (e.g., "functions.python" -> "python")
+                if (startsWith(rawName, functions_prefix)) {
+                    toolCall.name = rawName.substr(functions_prefix.length());
+                } else {
+                    toolCall.name = rawName;
+                }
+
+                toolCall.arguments = msg.getContent();
+                toolCall.id = generateRandomId();
+                toolCalls.push_back(std::move(toolCall));
+            } else if (msg.getChannel() == "analysis code" || msg.getChannel() == "commentary code" ||
+                       msg.getChannel() == "analysis json" || msg.getChannel() == "commentary json") {
+                // Implicit Python tool call - channel indicates code/json execution without explicit "to="
+                // This happens when model outputs: <|channel|>commentary to=functions.python<|channel|>commentary json<|message|>...
+                // The first channel with "to=" is lost, but "analysis/commentary code/json" indicates Python code execution
+                SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Found implicit Python tool call in channel [{}]", msg.getChannel());
+                ToolCall toolCall;
+                toolCall.name = "python";
+                toolCall.arguments = "{\"code\": " + escapeJsonString(msg.getContent()) + "}";
+                toolCall.id = generateRandomId();
+                toolCalls.push_back(std::move(toolCall));
+            } else {
+                SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Skipping message. Could not find tool name in channel [{}]", msg.getChannel());
+            }
+        }
+    }
+    return toolCalls;
+}
+
 bool Harmony::parse() {
     if (tokens.empty())
         return true;
diff --git a/src/llm/io_processing/gptoss/harmony.hpp b/src/llm/io_processing/gptoss/harmony.hpp
index 636c999a46..b898b6d7cb 100644
--- a/src/llm/io_processing/gptoss/harmony.hpp
+++ b/src/llm/io_processing/gptoss/harmony.hpp
@@ -46,11 +46,6 @@ class Harmony {
     // Valid messages parsed from unary output, used in final accessors
     std::vector<Message> messages;
 
-    // Intermediate state during parsing of each message, not used in final accessors
-    std::string content;
-    std::string reasoning;
-    ToolCalls_t toolCalls;
-
 public:
     Harmony(ov::genai::Tokenizer& tokenizer, const std::vector<int64_t>& tokens);
 
@@ -59,6 +54,7 @@ class Harmony {
     std::string getContent();
     std::string getReasoning();
     ToolCalls_t getToolCalls();
+    ToolCalls_t getBuiltInToolCalls();
 
     static const std::string TOKEN_START;
     static const std::string TOKEN_END;
diff --git a/src/llm/io_processing/gptoss/tool_parser.cpp b/src/llm/io_processing/gptoss/tool_parser.cpp
index d838c44a39..79397e8532 100644
--- a/src/llm/io_processing/gptoss/tool_parser.cpp
+++ b/src/llm/io_processing/gptoss/tool_parser.cpp
@@ -40,9 +40,13 @@ void GptOssToolParser::parse(ParsedOutput& parsedOutput, const std::vector<int64
     // This is because we have no guarantee that user will use both parsers, they might use only one of them.
     parsedOutput.content = harmony.getContent();
     parsedOutput.toolCalls = harmony.getToolCalls();
+    parsedOutput.builtInToolCalls = harmony.getBuiltInToolCalls();
     for (const auto& toolCall : parsedOutput.toolCalls) {
         SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Unary | GPT Tool | id: [{}], name: [{}], arguments: [{}]", toolCall.id, toolCall.name, toolCall.arguments);
     }
+    for (const auto& builtInToolCall : parsedOutput.builtInToolCalls) {
+        SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Unary | GPT Built-in Tool | id: [{}], name: [{}], arguments: [{}]", builtInToolCall.id, builtInToolCall.name, builtInToolCall.arguments);
+    }
 }
 
 /*

From 7aa572c8097f35013b950d6dde5df653547dada6 Mon Sep 17 00:00:00 2001
From: Damian Kalinowski <damian.kalinowski@intel.com>
Date: Thu, 5 Mar 2026 17:20:41 +0100
Subject: [PATCH 2/2] save

---
 demos/builtin_tools_execution/README.md       | 257 ++++++++++++++++++
 demos/builtin_tools_execution/client.py       | 212 +++++++++++++++
 .../builtin_tools_execution/requirements.txt  |   2 +
 3 files changed, 471 insertions(+)
 create mode 100755 demos/builtin_tools_execution/README.md
 create mode 100755 demos/builtin_tools_execution/client.py
 create mode 100755 demos/builtin_tools_execution/requirements.txt

diff --git a/demos/builtin_tools_execution/README.md b/demos/builtin_tools_execution/README.md
new file mode 100755
index 0000000000..fde72749a6
--- /dev/null
+++ b/demos/builtin_tools_execution/README.md
@@ -0,0 +1,257 @@
+# Built-in Tools Execution with GPT-OSS {#ovms_demos_builtin_tools_execution}
+
+This demo shows how to use **built-in tools** with the [GPT-OSS](https://github.com/openai/gpt-oss) model served by OpenVINO Model Server.
+
+GPT-OSS natively supports a `python` built-in tool. When the model decides it needs to execute Python code (e.g. for calculations), it emits a `tool_call`. A client-side loop catches that call, forwards the code to an **MCP server** for sandboxed execution, and sends the result back to the model so it can produce a final answer.
+
+The diagram below depicts the demo setup:
+```
+┌────────────┐       ┌──────────────┐       ┌──────────────┐
+│  Client     │──1──▶│  OVMS        │       │  MCP Server  │
+│  (Python)   │◀──2──│  (GPT-OSS)   │       │  (Python     │
+│             │──3──▶│              │       │   executor)  │
+│             │      │              │       │              │
+│             │──4──▶│              │       │              │
+│             │◀──5──│              │       │              │
+└─────┬───▲──┘       └──────────────┘       └──────▲───────┘
+      │   │                                        │
+      └───┼────────────────3a───────────────────────┘
+          └────────────────3b───────────────────────┘
+```
+1. Client sends chat request with `builtin_tools: ["python"]`
+2. Model returns a `tool_call` for `python` with generated code
+3. Client forwards the code to the MCP server (3a) and receives the result (3b)
+4. Client sends tool result back to the model
+5. Model produces the final answer
+
+> **Note:** This demo was tested with GPT-OSS-20b on Intel® Arc™ GPU and Intel® Data Center GPU Series on Ubuntu 22/24.
+
+## Prerequisites
+
+- **Docker Engine** with GPU support (`--device /dev/dri`)
+- **Python 3.10+** with pip
+
+
+## Step 1: Export the GPT-OSS Model
+
+GPT-OSS has built-in tool support. Export the model to OpenVINO IR format using the `export_model.py` script:
+
+```console
+curl https://raw.githubusercontent.com/openvinotoolkit/model_server/refs/heads/main/demos/common/export_models/export_model.py -o export_model.py
+pip3 install -r https://raw.githubusercontent.com/openvinotoolkit/model_server/refs/heads/main/demos/common/export_models/requirements.txt
+mkdir models
+```
+
+Run the export:
+```console
+python export_model.py text_generation \
+    --source_model openai/gpt-oss-20b \
+    --weight-format int4 \
+    --target_device GPU \
+    --config_file_path models/config.json \
+    --model_repository_path models \
+    --tool_parser gptoss \
+    --reasoning_parser gptoss
+```
+
+Download the GPT-OSS chat template:
+```console
+curl -L -o models/openai/gpt-oss-20b/chat_template.jinja \
+    https://raw.githubusercontent.com/openvinotoolkit/model_server/refs/heads/main/extras/chat_template_examples/chat_template_gpt_oss.jinja
+```
+
+You should have a model folder like below:
+```
+models
+├── config.json
+└── openai
+    └── gpt-oss-20b
+        ├── chat_template.jinja
+        ├── config.json
+        ├── generation_config.json
+        ├── graph.pbtxt
+        ├── openvino_detokenizer.bin
+        ├── openvino_detokenizer.xml
+        ├── openvino_model.bin
+        ├── openvino_model.xml
+        ├── openvino_tokenizer.bin
+        ├── openvino_tokenizer.xml
+        ├── special_tokens_map.json
+        ├── tokenizer_config.json
+        └── tokenizer.json
+```
+
+## Step 2: Start OpenVINO Model Server
+
+Deploy OVMS with the GPU image:
+
+```bash
+docker run -d --rm --name ovms-gptoss \
+    -p 8000:8000 \
+    -v $(pwd)/models:/models:ro \
+    --device /dev/dri \
+    --group-add=$(stat -c "%g" /dev/dri/render* | head -n 1) \
+    openvino/model_server:latest-gpu \
+    --rest_port 8000 \
+    --config_path /models/config.json
+```
+
+Wait for the model to load and verify readiness:
+```console
+curl http://localhost:8000/v1/config
+```
+
+:::{dropdown} Expected Response
+```json
+{
+    "openai/gpt-oss-20b": {
+        "model_version_status": [
+            {
+                "version": "1",
+                "state": "AVAILABLE",
+                "status": {
+                    "error_code": "OK",
+                    "error_message": "OK"
+                }
+            }
+        ]
+    }
+}
+```
+:::
+
+## Step 3: Set Up the MCP Python Executor
+
+The GPT-OSS repository includes a reference MCP server that executes Python code via a Jupyter kernel. Clone the repository and set up the MCP server:
+
+```console
+git clone https://github.com/openai/gpt-oss.git
+cd gpt-oss
+```
+
+Install the gpt-oss package and MCP server dependencies:
+```console
+pip install .
+cd gpt-oss-mcp-server
+pip install "mcp[cli]>=1.12.2" jupyter_client ipykernel
+```
+
+The MCP server uses the `dangerously_use_local_jupyter` backend which runs Python code through a local Jupyter kernel instead of Docker containers. To configure the port, patch the `FastMCP` constructor in `python_server.py`:
+```console
+sed -i 's/mcp = FastMCP(/mcp = FastMCP(port=8080, host="0.0.0.0",/' python_server.py
+```
+
+Set the environment variable and start the MCP server:
+```console
+PYTHON_EXECUTION_BACKEND=dangerously_use_local_jupyter mcp run -t sse python_server.py:mcp
+```
+
+> **Note:** The MCP server must remain running in the foreground. Open a new terminal for the next steps.
+
+> **Note:** `dangerously_use_local_jupyter` runs code through a local Jupyter kernel. For production use, consider the Docker-based backend with `PYTHON_EXECUTION_BACKEND=docker` and `docker pull python:3.11`.
+
+
+## Step 4: Run the Client
+
+Install the client dependencies:
+```console
+pip install openai mcp
+```
+
+Download and run the client script:
+```console
+curl https://raw.githubusercontent.com/openvinotoolkit/model_server/refs/heads/main/demos/builtin_tools_execution/client.py -o client.py
+python client.py --question "Which day of the week will be for 31 January of 3811? Use python for that."
+```
+
+You can pass any question via `--question`. The script follows the flow from the diagram, printing each step:
+
+:::{dropdown} Expected Output
+```
+==============================================================================
+  Built-in Tools Execution Demo (GPT-OSS + MCP Python Executor)
+==============================================================================
+
+Model:      openai/gpt-oss-20b
+OVMS URL:   http://localhost:8000/v3
+MCP URL:    http://127.0.0.1:8080/sse
+
+── Step 1: Sending chat request to OVMS with builtin_tools=["python"] ────────
+Question: Which day of the week will be for 31 January of 3811? Use python for that.
+
+── Step 2: Model returned a tool_call for "python" ──────────────────────────
+Finish reason: tool_calls
+Generated code:
+    import datetime
+    print(datetime.date(3811, 1, 31).strftime('%A'))
+
+── Step 3: Forwarding code to MCP server for execution ──────────────────────
+MCP server: http://127.0.0.1:8080/sse
+Execution result: Friday
+
+── Step 4: Sending tool result back to OVMS ──────────────────────────────────
+
+── Step 5: Model produced the final answer ───────────────────────────────────
+Content: January 31, 3811 will be a **Friday**.
+Finish reason: stop
+Usage: 113 prompt / 14 completion / 127 total tokens
+```
+:::
+
+### Configuration
+
+The script can be configured via environment variables:
+
+| Argument / Variable | Default | Description |
+|---------------------|---------|-------------|
+| `--question` | *"Which day of the week will be for 31 January of 3811? Use python for that."* | Question to send to the model |
+| `--base-url` / `OPENAI_BASE_URL` | `http://localhost:8000/v3` | OVMS REST API base URL |
+| `--mcp-server-url` / `MCP_SERVER_URL` | `http://127.0.0.1:8080/sse` | MCP server SSE endpoint |
+| `--model` / `OVMS_MODEL` | `openai/gpt-oss-20b` | Model name to use |
+
+Example with custom configuration:
+```console
+python client.py --question "What is the 50th prime number?" --base-url http://my-server:8000/v3
+```
+
+## How It Works
+
+### Built-in Tools in GPT-OSS
+
+GPT-OSS was trained with native support for a `python` built-in tool. To activate it, pass `builtin_tools: ["python"]` in the `chat_template_kwargs` parameter of the request:
+
+```python
+response = client.chat.completions.create(
+    model="openai/gpt-oss-20b",
+    messages=[{"role": "user", "content": "What is 2**100?"}],
+    extra_body={"chat_template_kwargs": {"builtin_tools": ["python"]}},
+)
+```
+
+When the model decides code execution is needed, it returns a response with `finish_reason: "tool_calls"` and a `tool_calls` array containing the generated Python code.
+
+### Client-Side Tool Execution
+
+Unlike standard function calling where tools are defined in the request, built-in tools are part of the model's training. The client is responsible for:
+1. Detecting `tool_calls` in the response
+2. Executing the code in a sandboxed environment (via the MCP server)
+3. Sending the result back as a `tool` message
+
+This pattern gives the client full control over the execution environment and security boundaries.
+
+### MCP Server
+
+The [Model Context Protocol (MCP)](https://modelcontextprotocol.io/) server provides a standardized interface for tool execution. The gpt-oss reference implementation supports multiple execution backends:
+- **`dangerously_use_local_jupyter`** (used in this demo) — runs code through a local Jupyter kernel. Quick to set up, suitable for development and demos.
+- **`docker`** — runs code in isolated Docker containers for sandboxed execution. Recommended for production use.
+
+The backend is selected via the `PYTHON_EXECUTION_BACKEND` environment variable.
+
+## References
+
+- [GPT-OSS repository](https://github.com/openai/gpt-oss)
+- [GPT-OSS model on HuggingFace](https://huggingface.co/openai/gpt-oss-20b)
+- [LLM quick start guide](../../docs/llm/quickstart.md)
+- [Agentic AI demo](../continuous_batching/agentic_ai/README.md)
+- [Chat completions API reference](../../docs/model_server_rest_api_chat.md)
+- [Model Context Protocol](https://modelcontextprotocol.io/)
diff --git a/demos/builtin_tools_execution/client.py b/demos/builtin_tools_execution/client.py
new file mode 100755
index 0000000000..96526830f1
--- /dev/null
+++ b/demos/builtin_tools_execution/client.py
@@ -0,0 +1,212 @@
+#!/usr/bin/env python3
+# Copyright (c) 2025 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+Client script demonstrating built-in Python tool execution with GPT-OSS
+served via OpenVINO Model Server.
+
+When the model returns a python tool_call, the code is forwarded to an MCP
+server (python executor) for execution, and the result is sent back to the
+model so it can produce a final answer.
+
+Requirements:
+    pip install openai mcp
+"""
+
+import argparse
+import asyncio
+import json
+import os
+
+from mcp import ClientSession
+from mcp.client.sse import sse_client
+from openai import OpenAI
+
+BANNER_WIDTH = 78
+
+
+def step_header(step: int, text: str) -> None:
+    """Print a visually distinct step header matching the demo diagram."""
+    tag = f"Step {step}: {text} "
+    print(f"\n\u2500\u2500 {tag}" + "\u2500" * max(0, BANNER_WIDTH - len(tag) - 3))
+
+
+def format_usage(usage) -> str:
+    if usage is None:
+        return ""
+    return (f"{usage.prompt_tokens} prompt / "
+            f"{usage.completion_tokens} completion / "
+            f"{usage.total_tokens} total tokens")
+
+
+# ── MCP python executor ─────────────────────────────────────────────────────
+async def run_python_via_mcp(code: str, mcp_url: str) -> str:
+    """Connect to the MCP SSE server and call the 'python' tool."""
+    async with sse_client(url=mcp_url) as (read_stream, write_stream):
+        async with ClientSession(read_stream, write_stream) as session:
+            await session.initialize()
+            result = await session.call_tool("python", arguments={"code": code})
+            parts = []
+            for item in result.content:
+                if hasattr(item, "text"):
+                    parts.append(item.text)
+            return "\n".join(parts) if parts else "(no output)"
+
+
+def execute_python(code: str, mcp_url: str) -> str:
+    """Synchronous wrapper around the async MCP call."""
+    return asyncio.run(run_python_via_mcp(code, mcp_url))
+
+
+# ── Main flow ────────────────────────────────────────────────────────────────
+def chat_with_python(question: str, *, base_url: str, model: str, mcp_url: str):
+    """
+    Execute the full built-in tool flow:
+      1. Send chat request with builtin_tools=["python"]
+      2. Model returns a tool_call with generated code
+      3. Forward code to MCP server for execution
+      4. Send tool result back to the model
+      5. Model produces the final answer
+    """
+    client = OpenAI(base_url=base_url, api_key="unused")
+    messages = [{"role": "user", "content": question}]
+
+    # ── Step 1 ───────────────────────────────────────────────────────────────
+    step_header(1, 'Sending chat request to OVMS with builtin_tools=["python"]')
+    print(f"Question: {question}")
+
+    response = client.chat.completions.create(
+        model=model,
+        messages=messages,
+        extra_body={"chat_template_kwargs": {"builtin_tools": ["python"]}},
+    )
+    message = response.choices[0].message
+
+    # If the model answered directly (no tool call), print and return
+    if not message.tool_calls:
+        step_header(5, "Model produced the final answer (no tool needed)")
+        print(f"Content: {message.content}")
+        print(f"Finish reason: {response.choices[0].finish_reason}")
+        print(f"Usage: {format_usage(response.usage)}")
+        return response
+
+    # ── Step 2 ───────────────────────────────────────────────────────────────
+    step_header(2, f'Model returned a tool_call for "{message.tool_calls[0].function.name}"')
+    print(f"Finish reason: {response.choices[0].finish_reason}")
+
+    if hasattr(message, "reasoning_content") and message.reasoning_content:
+        print(f"Reasoning: {message.reasoning_content}")
+
+    messages.append({
+        "role": "assistant",
+        "content": message.content or "",
+        "tool_calls": [tc.model_dump() for tc in message.tool_calls],
+    })
+
+    for tc in message.tool_calls:
+        if tc.function.name != "python":
+            continue
+
+        code = tc.function.arguments
+        try:
+            parsed = json.loads(code)
+            if isinstance(parsed, dict) and "code" in parsed:
+                code = parsed["code"]
+        except (json.JSONDecodeError, TypeError):
+            pass
+
+        print("Generated code:")
+        for line in code.splitlines():
+            print(f"    {line}")
+
+        # ── Step 3 ───────────────────────────────────────────────────────────
+        step_header(3, "Forwarding code to MCP server for execution")
+        print(f"MCP server: {mcp_url}")
+        tool_result = execute_python(code, mcp_url)
+        print(f"Execution result: {tool_result}")
+
+        messages.append({
+            "role": "tool",
+            "tool_call_id": tc.id,
+            "name": "python",
+            "content": tool_result,
+        })
+
+    # ── Step 4 ───────────────────────────────────────────────────────────────
+    step_header(4, "Sending tool result back to OVMS")
+
+    final_response = client.chat.completions.create(
+        model=model,
+        messages=messages,
+        extra_body={"chat_template_kwargs": {"builtin_tools": ["python"]}},
+    )
+
+    # ── Step 5 ───────────────────────────────────────────────────────────────
+    final_message = final_response.choices[0].message
+    step_header(5, "Model produced the final answer")
+    print(f"Content: {final_message.content}")
+    print(f"Finish reason: {final_response.choices[0].finish_reason}")
+    print(f"Usage: {format_usage(final_response.usage)}")
+
+    return final_response
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description="Built-in tools execution demo — GPT-OSS + MCP Python executor")
+    parser.add_argument(
+        "--question", "-q",
+        default="Which day of the week will be for 31 January of 3811? Use python for that.",
+        help="Question to send to the model")
+    parser.add_argument(
+        "--base-url",
+        default=os.getenv("OPENAI_BASE_URL", "http://localhost:8000/v3"),
+        help="OVMS REST API base URL (default: http://localhost:8000/v3)")
+    parser.add_argument(
+        "--mcp-server-url",
+        default=os.getenv("MCP_SERVER_URL", "http://127.0.0.1:8080/sse"),
+        help="MCP server SSE endpoint (default: http://127.0.0.1:8080/sse)")
+    parser.add_argument(
+        "--model",
+        default=os.getenv("OVMS_MODEL", "openai/gpt-oss-20b"),
+        help="Model name (default: openai/gpt-oss-20b)")
+    return parser.parse_args()
+
+
+def main():
+    args = parse_args()
+
+    print("=" * BANNER_WIDTH)
+    print("  Built-in Tools Execution Demo (GPT-OSS + MCP Python Executor)")
+    print("=" * BANNER_WIDTH)
+    print(f"\nModel:      {args.model}")
+    print(f"OVMS URL:   {args.base_url}")
+    print(f"MCP URL:    {args.mcp_server_url}")
+
+    try:
+        chat_with_python(
+            args.question,
+            base_url=args.base_url,
+            model=args.model,
+            mcp_url=args.mcp_server_url,
+        )
+    except Exception as e:
+        print(f"\nError: {e}")
+        raise
+
+
+if __name__ == "__main__":
+    main()
diff --git a/demos/builtin_tools_execution/requirements.txt b/demos/builtin_tools_execution/requirements.txt
new file mode 100755
index 0000000000..582dd5901e
--- /dev/null
+++ b/demos/builtin_tools_execution/requirements.txt
@@ -0,0 +1,2 @@
+openai
+mcp