From afb16199e6745be4cade02316527aeba171cfa80 Mon Sep 17 00:00:00 2001 From: Damian Kalinowski Date: Thu, 5 Mar 2026 16:21:21 +0100 Subject: [PATCH 1/2] v1 --- src/llm/apis/openai_completions.cpp | 23 +++++- src/llm/apis/openai_json_response.hpp | 12 +++ src/llm/apis/openai_request.hpp | 2 + src/llm/io_processing/base_output_parser.hpp | 2 + src/llm/io_processing/gptoss/harmony.cpp | 87 ++++++++++++++++++++ src/llm/io_processing/gptoss/harmony.hpp | 6 +- src/llm/io_processing/gptoss/tool_parser.cpp | 4 + 7 files changed, 129 insertions(+), 7 deletions(-) diff --git a/src/llm/apis/openai_completions.cpp b/src/llm/apis/openai_completions.cpp index 0402017564..dad431395d 100644 --- a/src/llm/apis/openai_completions.cpp +++ b/src/llm/apis/openai_completions.cpp @@ -483,6 +483,25 @@ absl::Status OpenAIChatCompletionsHandler::parseTools() { } request.toolChoice = tool_choice; + + it = doc.FindMember("chat_template_kwargs"); + // If chat_template_kwargs is present, we need to check for builtin_tools list + if (it != doc.MemberEnd() && !it->value.IsNull()) { + if (!it->value.IsObject()) + return absl::InvalidArgumentError("chat_template_kwargs is not a JSON object"); + auto builtinToolsIt = it->value.GetObject().FindMember("builtin_tools"); + if (builtinToolsIt != it->value.GetObject().MemberEnd() && !builtinToolsIt->value.IsNull()) { + if (!builtinToolsIt->value.IsArray()) + return absl::InvalidArgumentError("builtin_tools is not an array"); + for (size_t i = 0; i < builtinToolsIt->value.GetArray().Size(); i++) { + auto& toolNameValue = builtinToolsIt->value.GetArray()[i]; + if (!toolNameValue.IsString()) + return absl::InvalidArgumentError("Each builtin_tool name must be a string"); + request.allowedBuiltInTools.push_back(toolNameValue.GetString()); + } + } + } + if (jsonChanged) { StringBuffer buffer; Writer writer(buffer); @@ -538,7 +557,7 @@ absl::StatusOr> OpenAIChatCompletionsHan } const bool OpenAIChatCompletionsHandler::areToolsAvailable() const { - return !request.toolNameSchemaMap.empty(); + return !request.toolNameSchemaMap.empty() || !request.allowedBuiltInTools.empty(); } const OpenAIChatCompletionsRequest& OpenAIChatCompletionsHandler::getRequest() const { @@ -1007,7 +1026,7 @@ std::string OpenAIChatCompletionsHandler::serializeUnaryResponse(const std::vect // "length" => due to reaching max_tokens parameter // "tool_calls" => generation stopped due to generated tool calls - std::optional finishReason = mapFinishReason(generationOutput.finish_reason, !parsedOutput.toolCalls.empty()); + std::optional finishReason = mapFinishReason(generationOutput.finish_reason, !parsedOutput.toolCalls.empty() || !parsedOutput.builtInToolCalls.empty()); if (!finishReason.has_value()) { SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Unknown finish reason: {}", static_cast(generationOutput.finish_reason)); } diff --git a/src/llm/apis/openai_json_response.hpp b/src/llm/apis/openai_json_response.hpp index 30eed440f6..8d1bf647f4 100644 --- a/src/llm/apis/openai_json_response.hpp +++ b/src/llm/apis/openai_json_response.hpp @@ -169,6 +169,18 @@ class OpenAiJsonResponse : public Writer { Writer::EndObject(); } + for (const ToolCall& toolCall : parsedOutput.builtInToolCalls) { + StartObject(); + String("id", toolCall.id); + String("type", "function"); + + StartObject("function"); + String("name", toolCall.name); + String("arguments", toolCall.arguments); + Writer::EndObject(); + + Writer::EndObject(); + } Writer::EndArray(); Writer::EndObject(); return true; diff --git a/src/llm/apis/openai_request.hpp b/src/llm/apis/openai_request.hpp index de355c12a1..409b5a455f 100644 --- a/src/llm/apis/openai_request.hpp +++ b/src/llm/apis/openai_request.hpp @@ -81,6 +81,8 @@ struct OpenAIChatCompletionsRequest { // Holds value for tool_choice field as described in https://platform.openai.com/docs/api-reference/chat/create#chat_create-tool_choice std::string toolChoice; + std::vector allowedBuiltInTools; + OpenAIChatCompletionsRequest() = default; ~OpenAIChatCompletionsRequest() = default; }; diff --git a/src/llm/io_processing/base_output_parser.hpp b/src/llm/io_processing/base_output_parser.hpp index bc5d88ba4a..62cc6b68e9 100644 --- a/src/llm/io_processing/base_output_parser.hpp +++ b/src/llm/io_processing/base_output_parser.hpp @@ -43,6 +43,8 @@ struct ParsedOutput { std::string content; // Tool calls extracted from the response ToolCalls_t toolCalls; + // Tool calls extracted from the response that are identified as built-in (e.g., Python code execution, browser search, etc.) based on channel naming convention + ToolCalls_t builtInToolCalls; // Decoded reasoning from the response std::string reasoning; }; diff --git a/src/llm/io_processing/gptoss/harmony.cpp b/src/llm/io_processing/gptoss/harmony.cpp index 76f8ee9191..fd7f734048 100644 --- a/src/llm/io_processing/gptoss/harmony.cpp +++ b/src/llm/io_processing/gptoss/harmony.cpp @@ -26,6 +26,36 @@ namespace ovms { namespace openai { +// Helper function to escape a string for JSON +static std::string escapeJsonString(const std::string& input) { + std::string output; + output.reserve(input.size() + 16); // Reserve some extra space for escapes + output += '"'; + for (char c : input) { + switch (c) { + case '"': + output += "\\\""; + break; + case '\\': + output += "\\\\"; + break; + case '\n': + output += "\\n"; + break; + case '\r': + output += "\\r"; + break; + case '\t': + output += "\\t"; + break; + default: + output += c; + } + } + output += '"'; + return output; +} + Harmony::Harmony(ov::genai::Tokenizer& tokenizer, const std::vector& tokens) : tokenizer(tokenizer), tokens(tokens) {} @@ -121,6 +151,63 @@ ToolCalls_t Harmony::getToolCalls() { return toolCalls; } +/* + Built-in tools calls are extracted from messages in channel "analysis" that contain "to=.NAME" in the channel content; example: + <|channel|>analysis to=browser.search code<|message|>{"query": "latest developments AI technology 2025", "topn": 10, "source": "news"}<|call|> + + Also supports: + - "to=functions.python" format for Python code execution + - "analysis code" channel (implicit Python tool) when no explicit "to=" is present +*/ +ToolCalls_t Harmony::getBuiltInToolCalls() { + static const std::string tool_prefix = "to="; + static const std::string functions_prefix = "functions."; + ToolCalls_t toolCalls; + for (const auto& msg : messages) { + if (startsWith(msg.getChannel(), "analysis") || startsWith(msg.getChannel(), "commentary")) { + size_t marker = msg.getChannel().find(tool_prefix); + if (marker != std::string::npos) { + marker += tool_prefix.length(); + size_t firstWhiteSpaceOrSpecialBegin = msg.getChannel().find_first_of(" \t\n\r<", marker); + ToolCall toolCall; + std::string rawName; + if (firstWhiteSpaceOrSpecialBegin == std::string::npos) { + // Take the remaining part of the string + rawName = msg.getChannel().substr(marker); + } else { + // Take up to the first whitespace or special token begin + rawName = msg.getChannel().substr(marker, firstWhiteSpaceOrSpecialBegin - marker); + } + + // Strip "functions." prefix if present (e.g., "functions.python" -> "python") + if (startsWith(rawName, functions_prefix)) { + toolCall.name = rawName.substr(functions_prefix.length()); + } else { + toolCall.name = rawName; + } + + toolCall.arguments = msg.getContent(); + toolCall.id = generateRandomId(); + toolCalls.push_back(std::move(toolCall)); + } else if (msg.getChannel() == "analysis code" || msg.getChannel() == "commentary code" || + msg.getChannel() == "analysis json" || msg.getChannel() == "commentary json") { + // Implicit Python tool call - channel indicates code/json execution without explicit "to=" + // This happens when model outputs: <|channel|>commentary to=functions.python<|channel|>commentary json<|message|>... + // The first channel with "to=" is lost, but "analysis/commentary code/json" indicates Python code execution + SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Found implicit Python tool call in channel [{}]", msg.getChannel()); + ToolCall toolCall; + toolCall.name = "python"; + toolCall.arguments = "{\"code\": " + escapeJsonString(msg.getContent()) + "}"; + toolCall.id = generateRandomId(); + toolCalls.push_back(std::move(toolCall)); + } else { + SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Skipping message. Could not find tool name in channel [{}]", msg.getChannel()); + } + } + } + return toolCalls; +} + bool Harmony::parse() { if (tokens.empty()) return true; diff --git a/src/llm/io_processing/gptoss/harmony.hpp b/src/llm/io_processing/gptoss/harmony.hpp index 636c999a46..b898b6d7cb 100644 --- a/src/llm/io_processing/gptoss/harmony.hpp +++ b/src/llm/io_processing/gptoss/harmony.hpp @@ -46,11 +46,6 @@ class Harmony { // Valid messages parsed from unary output, used in final accessors std::vector messages; - // Intermediate state during parsing of each message, not used in final accessors - std::string content; - std::string reasoning; - ToolCalls_t toolCalls; - public: Harmony(ov::genai::Tokenizer& tokenizer, const std::vector& tokens); @@ -59,6 +54,7 @@ class Harmony { std::string getContent(); std::string getReasoning(); ToolCalls_t getToolCalls(); + ToolCalls_t getBuiltInToolCalls(); static const std::string TOKEN_START; static const std::string TOKEN_END; diff --git a/src/llm/io_processing/gptoss/tool_parser.cpp b/src/llm/io_processing/gptoss/tool_parser.cpp index d838c44a39..79397e8532 100644 --- a/src/llm/io_processing/gptoss/tool_parser.cpp +++ b/src/llm/io_processing/gptoss/tool_parser.cpp @@ -40,9 +40,13 @@ void GptOssToolParser::parse(ParsedOutput& parsedOutput, const std::vector Date: Thu, 5 Mar 2026 17:20:41 +0100 Subject: [PATCH 2/2] save --- demos/builtin_tools_execution/README.md | 257 ++++++++++++++++++ demos/builtin_tools_execution/client.py | 212 +++++++++++++++ .../builtin_tools_execution/requirements.txt | 2 + 3 files changed, 471 insertions(+) create mode 100755 demos/builtin_tools_execution/README.md create mode 100755 demos/builtin_tools_execution/client.py create mode 100755 demos/builtin_tools_execution/requirements.txt diff --git a/demos/builtin_tools_execution/README.md b/demos/builtin_tools_execution/README.md new file mode 100755 index 0000000000..fde72749a6 --- /dev/null +++ b/demos/builtin_tools_execution/README.md @@ -0,0 +1,257 @@ +# Built-in Tools Execution with GPT-OSS {#ovms_demos_builtin_tools_execution} + +This demo shows how to use **built-in tools** with the [GPT-OSS](https://github.com/openai/gpt-oss) model served by OpenVINO Model Server. + +GPT-OSS natively supports a `python` built-in tool. When the model decides it needs to execute Python code (e.g. for calculations), it emits a `tool_call`. A client-side loop catches that call, forwards the code to an **MCP server** for sandboxed execution, and sends the result back to the model so it can produce a final answer. + +The diagram below depicts the demo setup: +``` +┌────────────┐ ┌──────────────┐ ┌──────────────┐ +│ Client │──1──▶│ OVMS │ │ MCP Server │ +│ (Python) │◀──2──│ (GPT-OSS) │ │ (Python │ +│ │──3──▶│ │ │ executor) │ +│ │ │ │ │ │ +│ │──4──▶│ │ │ │ +│ │◀──5──│ │ │ │ +└─────┬───▲──┘ └──────────────┘ └──────▲───────┘ + │ │ │ + └───┼────────────────3a───────────────────────┘ + └────────────────3b───────────────────────┘ +``` +1. Client sends chat request with `builtin_tools: ["python"]` +2. Model returns a `tool_call` for `python` with generated code +3. Client forwards the code to the MCP server (3a) and receives the result (3b) +4. Client sends tool result back to the model +5. Model produces the final answer + +> **Note:** This demo was tested with GPT-OSS-20b on Intel® Arc™ GPU and Intel® Data Center GPU Series on Ubuntu 22/24. + +## Prerequisites + +- **Docker Engine** with GPU support (`--device /dev/dri`) +- **Python 3.10+** with pip + + +## Step 1: Export the GPT-OSS Model + +GPT-OSS has built-in tool support. Export the model to OpenVINO IR format using the `export_model.py` script: + +```console +curl https://raw.githubusercontent.com/openvinotoolkit/model_server/refs/heads/main/demos/common/export_models/export_model.py -o export_model.py +pip3 install -r https://raw.githubusercontent.com/openvinotoolkit/model_server/refs/heads/main/demos/common/export_models/requirements.txt +mkdir models +``` + +Run the export: +```console +python export_model.py text_generation \ + --source_model openai/gpt-oss-20b \ + --weight-format int4 \ + --target_device GPU \ + --config_file_path models/config.json \ + --model_repository_path models \ + --tool_parser gptoss \ + --reasoning_parser gptoss +``` + +Download the GPT-OSS chat template: +```console +curl -L -o models/openai/gpt-oss-20b/chat_template.jinja \ + https://raw.githubusercontent.com/openvinotoolkit/model_server/refs/heads/main/extras/chat_template_examples/chat_template_gpt_oss.jinja +``` + +You should have a model folder like below: +``` +models +├── config.json +└── openai + └── gpt-oss-20b + ├── chat_template.jinja + ├── config.json + ├── generation_config.json + ├── graph.pbtxt + ├── openvino_detokenizer.bin + ├── openvino_detokenizer.xml + ├── openvino_model.bin + ├── openvino_model.xml + ├── openvino_tokenizer.bin + ├── openvino_tokenizer.xml + ├── special_tokens_map.json + ├── tokenizer_config.json + └── tokenizer.json +``` + +## Step 2: Start OpenVINO Model Server + +Deploy OVMS with the GPU image: + +```bash +docker run -d --rm --name ovms-gptoss \ + -p 8000:8000 \ + -v $(pwd)/models:/models:ro \ + --device /dev/dri \ + --group-add=$(stat -c "%g" /dev/dri/render* | head -n 1) \ + openvino/model_server:latest-gpu \ + --rest_port 8000 \ + --config_path /models/config.json +``` + +Wait for the model to load and verify readiness: +```console +curl http://localhost:8000/v1/config +``` + +:::{dropdown} Expected Response +```json +{ + "openai/gpt-oss-20b": { + "model_version_status": [ + { + "version": "1", + "state": "AVAILABLE", + "status": { + "error_code": "OK", + "error_message": "OK" + } + } + ] + } +} +``` +::: + +## Step 3: Set Up the MCP Python Executor + +The GPT-OSS repository includes a reference MCP server that executes Python code via a Jupyter kernel. Clone the repository and set up the MCP server: + +```console +git clone https://github.com/openai/gpt-oss.git +cd gpt-oss +``` + +Install the gpt-oss package and MCP server dependencies: +```console +pip install . +cd gpt-oss-mcp-server +pip install "mcp[cli]>=1.12.2" jupyter_client ipykernel +``` + +The MCP server uses the `dangerously_use_local_jupyter` backend which runs Python code through a local Jupyter kernel instead of Docker containers. To configure the port, patch the `FastMCP` constructor in `python_server.py`: +```console +sed -i 's/mcp = FastMCP(/mcp = FastMCP(port=8080, host="0.0.0.0",/' python_server.py +``` + +Set the environment variable and start the MCP server: +```console +PYTHON_EXECUTION_BACKEND=dangerously_use_local_jupyter mcp run -t sse python_server.py:mcp +``` + +> **Note:** The MCP server must remain running in the foreground. Open a new terminal for the next steps. + +> **Note:** `dangerously_use_local_jupyter` runs code through a local Jupyter kernel. For production use, consider the Docker-based backend with `PYTHON_EXECUTION_BACKEND=docker` and `docker pull python:3.11`. + + +## Step 4: Run the Client + +Install the client dependencies: +```console +pip install openai mcp +``` + +Download and run the client script: +```console +curl https://raw.githubusercontent.com/openvinotoolkit/model_server/refs/heads/main/demos/builtin_tools_execution/client.py -o client.py +python client.py --question "Which day of the week will be for 31 January of 3811? Use python for that." +``` + +You can pass any question via `--question`. The script follows the flow from the diagram, printing each step: + +:::{dropdown} Expected Output +``` +============================================================================== + Built-in Tools Execution Demo (GPT-OSS + MCP Python Executor) +============================================================================== + +Model: openai/gpt-oss-20b +OVMS URL: http://localhost:8000/v3 +MCP URL: http://127.0.0.1:8080/sse + +── Step 1: Sending chat request to OVMS with builtin_tools=["python"] ──────── +Question: Which day of the week will be for 31 January of 3811? Use python for that. + +── Step 2: Model returned a tool_call for "python" ────────────────────────── +Finish reason: tool_calls +Generated code: + import datetime + print(datetime.date(3811, 1, 31).strftime('%A')) + +── Step 3: Forwarding code to MCP server for execution ────────────────────── +MCP server: http://127.0.0.1:8080/sse +Execution result: Friday + +── Step 4: Sending tool result back to OVMS ────────────────────────────────── + +── Step 5: Model produced the final answer ─────────────────────────────────── +Content: January 31, 3811 will be a **Friday**. +Finish reason: stop +Usage: 113 prompt / 14 completion / 127 total tokens +``` +::: + +### Configuration + +The script can be configured via environment variables: + +| Argument / Variable | Default | Description | +|---------------------|---------|-------------| +| `--question` | *"Which day of the week will be for 31 January of 3811? Use python for that."* | Question to send to the model | +| `--base-url` / `OPENAI_BASE_URL` | `http://localhost:8000/v3` | OVMS REST API base URL | +| `--mcp-server-url` / `MCP_SERVER_URL` | `http://127.0.0.1:8080/sse` | MCP server SSE endpoint | +| `--model` / `OVMS_MODEL` | `openai/gpt-oss-20b` | Model name to use | + +Example with custom configuration: +```console +python client.py --question "What is the 50th prime number?" --base-url http://my-server:8000/v3 +``` + +## How It Works + +### Built-in Tools in GPT-OSS + +GPT-OSS was trained with native support for a `python` built-in tool. To activate it, pass `builtin_tools: ["python"]` in the `chat_template_kwargs` parameter of the request: + +```python +response = client.chat.completions.create( + model="openai/gpt-oss-20b", + messages=[{"role": "user", "content": "What is 2**100?"}], + extra_body={"chat_template_kwargs": {"builtin_tools": ["python"]}}, +) +``` + +When the model decides code execution is needed, it returns a response with `finish_reason: "tool_calls"` and a `tool_calls` array containing the generated Python code. + +### Client-Side Tool Execution + +Unlike standard function calling where tools are defined in the request, built-in tools are part of the model's training. The client is responsible for: +1. Detecting `tool_calls` in the response +2. Executing the code in a sandboxed environment (via the MCP server) +3. Sending the result back as a `tool` message + +This pattern gives the client full control over the execution environment and security boundaries. + +### MCP Server + +The [Model Context Protocol (MCP)](https://modelcontextprotocol.io/) server provides a standardized interface for tool execution. The gpt-oss reference implementation supports multiple execution backends: +- **`dangerously_use_local_jupyter`** (used in this demo) — runs code through a local Jupyter kernel. Quick to set up, suitable for development and demos. +- **`docker`** — runs code in isolated Docker containers for sandboxed execution. Recommended for production use. + +The backend is selected via the `PYTHON_EXECUTION_BACKEND` environment variable. + +## References + +- [GPT-OSS repository](https://github.com/openai/gpt-oss) +- [GPT-OSS model on HuggingFace](https://huggingface.co/openai/gpt-oss-20b) +- [LLM quick start guide](../../docs/llm/quickstart.md) +- [Agentic AI demo](../continuous_batching/agentic_ai/README.md) +- [Chat completions API reference](../../docs/model_server_rest_api_chat.md) +- [Model Context Protocol](https://modelcontextprotocol.io/) diff --git a/demos/builtin_tools_execution/client.py b/demos/builtin_tools_execution/client.py new file mode 100755 index 0000000000..96526830f1 --- /dev/null +++ b/demos/builtin_tools_execution/client.py @@ -0,0 +1,212 @@ +#!/usr/bin/env python3 +# Copyright (c) 2025 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +""" +Client script demonstrating built-in Python tool execution with GPT-OSS +served via OpenVINO Model Server. + +When the model returns a python tool_call, the code is forwarded to an MCP +server (python executor) for execution, and the result is sent back to the +model so it can produce a final answer. + +Requirements: + pip install openai mcp +""" + +import argparse +import asyncio +import json +import os + +from mcp import ClientSession +from mcp.client.sse import sse_client +from openai import OpenAI + +BANNER_WIDTH = 78 + + +def step_header(step: int, text: str) -> None: + """Print a visually distinct step header matching the demo diagram.""" + tag = f"Step {step}: {text} " + print(f"\n\u2500\u2500 {tag}" + "\u2500" * max(0, BANNER_WIDTH - len(tag) - 3)) + + +def format_usage(usage) -> str: + if usage is None: + return "" + return (f"{usage.prompt_tokens} prompt / " + f"{usage.completion_tokens} completion / " + f"{usage.total_tokens} total tokens") + + +# ── MCP python executor ───────────────────────────────────────────────────── +async def run_python_via_mcp(code: str, mcp_url: str) -> str: + """Connect to the MCP SSE server and call the 'python' tool.""" + async with sse_client(url=mcp_url) as (read_stream, write_stream): + async with ClientSession(read_stream, write_stream) as session: + await session.initialize() + result = await session.call_tool("python", arguments={"code": code}) + parts = [] + for item in result.content: + if hasattr(item, "text"): + parts.append(item.text) + return "\n".join(parts) if parts else "(no output)" + + +def execute_python(code: str, mcp_url: str) -> str: + """Synchronous wrapper around the async MCP call.""" + return asyncio.run(run_python_via_mcp(code, mcp_url)) + + +# ── Main flow ──────────────────────────────────────────────────────────────── +def chat_with_python(question: str, *, base_url: str, model: str, mcp_url: str): + """ + Execute the full built-in tool flow: + 1. Send chat request with builtin_tools=["python"] + 2. Model returns a tool_call with generated code + 3. Forward code to MCP server for execution + 4. Send tool result back to the model + 5. Model produces the final answer + """ + client = OpenAI(base_url=base_url, api_key="unused") + messages = [{"role": "user", "content": question}] + + # ── Step 1 ─────────────────────────────────────────────────────────────── + step_header(1, 'Sending chat request to OVMS with builtin_tools=["python"]') + print(f"Question: {question}") + + response = client.chat.completions.create( + model=model, + messages=messages, + extra_body={"chat_template_kwargs": {"builtin_tools": ["python"]}}, + ) + message = response.choices[0].message + + # If the model answered directly (no tool call), print and return + if not message.tool_calls: + step_header(5, "Model produced the final answer (no tool needed)") + print(f"Content: {message.content}") + print(f"Finish reason: {response.choices[0].finish_reason}") + print(f"Usage: {format_usage(response.usage)}") + return response + + # ── Step 2 ─────────────────────────────────────────────────────────────── + step_header(2, f'Model returned a tool_call for "{message.tool_calls[0].function.name}"') + print(f"Finish reason: {response.choices[0].finish_reason}") + + if hasattr(message, "reasoning_content") and message.reasoning_content: + print(f"Reasoning: {message.reasoning_content}") + + messages.append({ + "role": "assistant", + "content": message.content or "", + "tool_calls": [tc.model_dump() for tc in message.tool_calls], + }) + + for tc in message.tool_calls: + if tc.function.name != "python": + continue + + code = tc.function.arguments + try: + parsed = json.loads(code) + if isinstance(parsed, dict) and "code" in parsed: + code = parsed["code"] + except (json.JSONDecodeError, TypeError): + pass + + print("Generated code:") + for line in code.splitlines(): + print(f" {line}") + + # ── Step 3 ─────────────────────────────────────────────────────────── + step_header(3, "Forwarding code to MCP server for execution") + print(f"MCP server: {mcp_url}") + tool_result = execute_python(code, mcp_url) + print(f"Execution result: {tool_result}") + + messages.append({ + "role": "tool", + "tool_call_id": tc.id, + "name": "python", + "content": tool_result, + }) + + # ── Step 4 ─────────────────────────────────────────────────────────────── + step_header(4, "Sending tool result back to OVMS") + + final_response = client.chat.completions.create( + model=model, + messages=messages, + extra_body={"chat_template_kwargs": {"builtin_tools": ["python"]}}, + ) + + # ── Step 5 ─────────────────────────────────────────────────────────────── + final_message = final_response.choices[0].message + step_header(5, "Model produced the final answer") + print(f"Content: {final_message.content}") + print(f"Finish reason: {final_response.choices[0].finish_reason}") + print(f"Usage: {format_usage(final_response.usage)}") + + return final_response + + +def parse_args(): + parser = argparse.ArgumentParser( + description="Built-in tools execution demo — GPT-OSS + MCP Python executor") + parser.add_argument( + "--question", "-q", + default="Which day of the week will be for 31 January of 3811? Use python for that.", + help="Question to send to the model") + parser.add_argument( + "--base-url", + default=os.getenv("OPENAI_BASE_URL", "http://localhost:8000/v3"), + help="OVMS REST API base URL (default: http://localhost:8000/v3)") + parser.add_argument( + "--mcp-server-url", + default=os.getenv("MCP_SERVER_URL", "http://127.0.0.1:8080/sse"), + help="MCP server SSE endpoint (default: http://127.0.0.1:8080/sse)") + parser.add_argument( + "--model", + default=os.getenv("OVMS_MODEL", "openai/gpt-oss-20b"), + help="Model name (default: openai/gpt-oss-20b)") + return parser.parse_args() + + +def main(): + args = parse_args() + + print("=" * BANNER_WIDTH) + print(" Built-in Tools Execution Demo (GPT-OSS + MCP Python Executor)") + print("=" * BANNER_WIDTH) + print(f"\nModel: {args.model}") + print(f"OVMS URL: {args.base_url}") + print(f"MCP URL: {args.mcp_server_url}") + + try: + chat_with_python( + args.question, + base_url=args.base_url, + model=args.model, + mcp_url=args.mcp_server_url, + ) + except Exception as e: + print(f"\nError: {e}") + raise + + +if __name__ == "__main__": + main() diff --git a/demos/builtin_tools_execution/requirements.txt b/demos/builtin_tools_execution/requirements.txt new file mode 100755 index 0000000000..582dd5901e --- /dev/null +++ b/demos/builtin_tools_execution/requirements.txt @@ -0,0 +1,2 @@ +openai +mcp