diff --git a/demos/c_api_minimal_app/Makefile b/demos/c_api_minimal_app/Makefile index 86e28febd5..8fd0473b75 100644 --- a/demos/c_api_minimal_app/Makefile +++ b/demos/c_api_minimal_app/Makefile @@ -25,13 +25,13 @@ BASE_OS ?= ubuntu24 ifeq ($(BASE_OS),ubuntu24) BASE_OS_TAG_UBUNTU ?= 24.04 - PACKAGE_URL ?="https://github.com/openvinotoolkit/model_server/releases/download/v2026.1/ovms_ubuntu24_2026.1.0_python_off.tar.gz" + PACKAGE_URL ?="https://github.com/openvinotoolkit/model_server/releases/download/v2026.2/ovms_ubuntu24_2026.2.0_python_off.tar.gz" BASE_IMAGE ?= ubuntu:$(BASE_OS_TAG_UBUNTU) DIST_OS=ubuntu endif ifeq ($(BASE_OS),redhat) BASE_OS_TAG_REDHAT ?= 9.6 - PACKAGE_URL ="https://github.com/openvinotoolkit/model_server/releases/download/v2026.1/ovms_redhat_2026.1.0_python_off.tar.gz" + PACKAGE_URL ="https://github.com/openvinotoolkit/model_server/releases/download/v2026.2/ovms_redhat_2026.2.0_python_off.tar.gz" BASE_IMAGE ?= registry.access.redhat.com/ubi9/ubi:$(BASE_OS_TAG_REDHAT) DIST_OS=redhat endif diff --git a/demos/continuous_batching/agentic_ai/.gitignore b/demos/continuous_batching/agentic_ai/.gitignore new file mode 100644 index 0000000000..38d1f85679 --- /dev/null +++ b/demos/continuous_batching/agentic_ai/.gitignore @@ -0,0 +1 @@ +mcp_weather_server diff --git a/demos/continuous_batching/agentic_ai/README.md b/demos/continuous_batching/agentic_ai/README.md index 8427d7d3b8..1b76928aeb 100644 --- a/demos/continuous_batching/agentic_ai/README.md +++ b/demos/continuous_batching/agentic_ai/README.md @@ -123,6 +123,28 @@ Exemplary output: The current weather in Tokyo is Overcast with a temperature of 9.4°C (feels like 6.4°C), relative humidity at 42%, and dew point at -2.9°C. The wind is blowing from the northeast at 3.6 km/h with gusts up to 24.8 km/h. The atmospheric pressure is 1018.9 hPa with 84% cloud cover. Visibility is 24.1 km. ``` ::: +:::{tab-item} Qwen3.6-35B-A3B +:sync: Qwen3.6-35B-A3B +Vision Language MoE model (35B total / 3B active parameters). Requires OpenVINO 2026.2 or newer and a GPU with sufficient memory to fit the INT4 weights. Tested on PantherLake iGPU with 32GB RAM with iGPU allocation increase and B70 dGPU. + +Pull and start OVMS: +```bat +ovms.exe --rest_port 8000 --source_model OpenVINO/Qwen3.6-35B-A3B-int4-ov --model_repository_path c:\models --reasoning_parser qwen3 --tool_parser qwen3coder --target_device GPU --task text_generation --cache_dir .cache --allowed_media_domains raw.githubusercontent.com +``` + +Use MCP server, with additional image of Gdańsk old town. VLM model deduces location and calls `get_weather` tool to summarize the weather conditions in the city. + +```{image} https://images.pexels.com/photos/20015887/pexels-photo-20015887.jpeg +:alt: poland +:width: 360px +``` + +> **Note**: Image source: [Link](https://images.pexels.com/photos/20015887/pexels-photo-20015887.jpeg) + +```bat +python openai_agent.py --query "What is the current weather in location depicted in the image?" --image https://raw.githubusercontent.com/openvinotoolkit/model_server/refs/heads/releases/2026/1/demos/continuous_batching/agentic_ai/photo.jpeg --model OpenVINO/Qwen3.6-35B-A3B-int4-ov --base-url http://localhost:8000/v3 --mcp-server-url http://localhost:8080/sse --mcp-server weather +``` +::: :::{tab-item} gpt-oss-20b :sync: gpt-oss-20b Pull and start OVMS: @@ -283,6 +305,30 @@ Exemplary output: The current weather in Tokyo is overcast with a temperature of 9.4°C (feels like 6.4°C). The relative humidity is 42%, and the dew point is -2.9°C. Wind is blowing from the northeast at 3.6 km/h, with gusts up to 24.8 km/h. The atmospheric pressure is 1018.9 hPa, and there is 84% cloud cover. Visibility is 24.1 km. ``` ::: +:::{tab-item} Qwen3.6-35B-A3B +:sync: Qwen3.6-35B-A3B +Vision Language MoE model (35B total / 3B active parameters). Requires OpenVINO 2026.2 or newer and enough host memory to fit the INT4 weights. Tested on PantherLake iGPU with 32GB RAM with iGPU allocation increase and B70 dGPU. + +Pull and start OVMS: +```bash +mkdir -p ${HOME}/models +docker run -d --user $(id -u):$(id -g) --rm -p 8000:8000 -v ${HOME}/models:/models openvino/model_server:weekly \ +--rest_port 8000 --source_model OpenVINO/Qwen3.6-35B-A3B-int4-ov --model_repository_path /models --reasoning_parser qwen3 --tool_parser qwen3coder --task text_generation --allowed_media_domains raw.githubusercontent.com +``` + +Use MCP server, with additional image of Gdańsk old town. VLM model deduces location and calls `get_weather` tool to summarize the weather conditions in the city. + +```{image} https://images.pexels.com/photos/20015887/pexels-photo-20015887.jpeg +:alt: poland +:width: 360px +``` + +> **Note**: Image source: [Link](https://images.pexels.com/photos/20015887/pexels-photo-20015887.jpeg) + +```bash +python openai_agent.py --query "What is the current weather in location depicted in the image?" --image https://raw.githubusercontent.com/openvinotoolkit/model_server/refs/heads/releases/2026/1/demos/continuous_batching/agentic_ai/photo.jpeg --model OpenVINO/Qwen3.6-35B-A3B-int4-ov --base-url http://localhost:8000/v3 --mcp-server-url http://localhost:8080/sse --mcp-server weather +``` +::: :::{tab-item} gpt-oss-20b :sync: gpt-oss-20b Pull and start OVMS: @@ -408,6 +454,30 @@ Exemplary output: The current weather in Tokyo is overcast with a temperature of 9.4°C (feels like 6.4°C). The relative humidity is 42%, and the dew point is -2.9°C. Wind is blowing from the northeast at 3.6 km/h, with gusts up to 24.8 km/h. The atmospheric pressure is 1018.9 hPa, and there is 84% cloud cover. Visibility is 24.1 km. ``` ::: +:::{tab-item} Qwen3.6-35B-A3B +:sync: Qwen3.6-35B-A3B +Vision Language MoE model (35B total / 3B active parameters). Requires OpenVINO 2026.2 or newer and a GPU with sufficient memory to fit the INT4 weights. Tested on PantherLake iGPU with 32GB RAM with iGPU allocation increase and B70 dGPU. + +Pull and start OVMS: +```bash +mkdir -p ${HOME}/models +docker run -d --user $(id -u):$(id -g) --rm -p 8000:8000 -v ${HOME}/models:/models --device /dev/dri --group-add=$(stat -c "%g" /dev/dri/render* | head -n 1) openvino/model_server:weekly \ +--rest_port 8000 --source_model OpenVINO/Qwen3.6-35B-A3B-int4-ov --model_repository_path /models --reasoning_parser qwen3 --tool_parser qwen3coder --target_device GPU --task text_generation --allowed_media_domains raw.githubusercontent.com +``` + +Use MCP server, with additional image of Gdańsk old town. VLM model deduces location and calls `get_weather` tool to summarize the weather conditions in the city. + +```{image} https://images.pexels.com/photos/20015887/pexels-photo-20015887.jpeg +:alt: poland +:width: 360px +``` + +> **Note**: Image source: [Link](https://images.pexels.com/photos/20015887/pexels-photo-20015887.jpeg) + +```bash +python openai_agent.py --query "What is the current weather in location depicted in the image?" --image https://raw.githubusercontent.com/openvinotoolkit/model_server/refs/heads/releases/2026/1/demos/continuous_batching/agentic_ai/photo.jpeg --model OpenVINO/Qwen3.6-35B-A3B-int4-ov --base-url http://localhost:8000/v3 --mcp-server-url http://localhost:8080/sse --mcp-server weather +``` +::: :::{tab-item} gpt-oss-20b :sync: gpt-oss-20b Pull and start OVMS: diff --git a/docs/deploying_server_baremetal.md b/docs/deploying_server_baremetal.md index 7302d373fb..6f56df1e6d 100644 --- a/docs/deploying_server_baremetal.md +++ b/docs/deploying_server_baremetal.md @@ -15,13 +15,13 @@ You can download model server package in two configurations. One with Python sup :sync: ubuntu-22-04 Download precompiled package (without python): ```{code} sh -wget https://github.com/openvinotoolkit/model_server/releases/download/v2026.1/ovms_ubuntu22_2026.1.0_python_off.tar.gz -tar -xzvf ovms_ubuntu22_2026.1.0_python_off.tar.gz +wget https://github.com/openvinotoolkit/model_server/releases/download/v2026.2/ovms_ubuntu22_2026.2.0_python_off.tar.gz +tar -xzvf ovms_ubuntu22_2026.2.0_python_off.tar.gz ``` or precompiled package (with python): ```{code} sh -wget https://github.com/openvinotoolkit/model_server/releases/download/v2026.1/ovms_ubuntu22_2026.1.0_python_on.tar.gz -tar -xzvf ovms_ubuntu22_2026.1.0_python_on.tar.gz +wget https://github.com/openvinotoolkit/model_server/releases/download/v2026.2/ovms_ubuntu22_2026.2.0_python_on.tar.gz +tar -xzvf ovms_ubuntu22_2026.2.0_python_on.tar.gz ``` Install required libraries: ```{code} sh @@ -50,13 +50,13 @@ Model server version with Python is shipped with those packages and new installa :sync: ubuntu-24-04 Download precompiled package (without python): ```{code} sh -wget https://github.com/openvinotoolkit/model_server/releases/download/v2026.1/ovms_ubuntu24_2026.1.0_python_off.tar.gz -tar -xzvf ovms_ubuntu24_2026.1.0_python_off.tar.gz +wget https://github.com/openvinotoolkit/model_server/releases/download/v2026.2/ovms_ubuntu24_2026.2.0_python_off.tar.gz +tar -xzvf ovms_ubuntu24_2026.2.0_python_off.tar.gz ``` or precompiled package (with python): ```{code} sh -wget https://github.com/openvinotoolkit/model_server/releases/download/v2026.1/ovms_ubuntu24_2026.1.0_python_on.tar.gz -tar -xzvf ovms_ubuntu24_2026.1.0_python_on.tar.gz +wget https://github.com/openvinotoolkit/model_server/releases/download/v2026.2/ovms_ubuntu24_2026.2.0_python_on.tar.gz +tar -xzvf ovms_ubuntu24_2026.2.0_python_on.tar.gz ``` Install required libraries: ```{code} sh @@ -85,13 +85,13 @@ Model server version with Python is shipped with those packages and new installa :sync: rhel-9.6 Download precompiled package (without python): ```{code} sh -wget https://github.com/openvinotoolkit/model_server/releases/download/v2026.1/ovms_redhat_2026.1.0_python_off.tar.gz -tar -xzvf ovms_redhat_2026.1.0_python_off.tar.gz +wget https://github.com/openvinotoolkit/model_server/releases/download/v2026.2/ovms_redhat_2026.2.0_python_off.tar.gz +tar -xzvf ovms_redhat_2026.2.0_python_off.tar.gz ``` or precompiled package (with python): ```{code} sh -wget https://github.com/openvinotoolkit/model_server/releases/download/v2026.1/ovms_redhat_2026.1.0_python_on.tar.gz -tar -xzvf ovms_redhat_2026.1.0_python_on.tar.gz +wget https://github.com/openvinotoolkit/model_server/releases/download/v2026.2/ovms_redhat_2026.2.0_python_on.tar.gz +tar -xzvf ovms_redhat_2026.2.0_python_on.tar.gz ``` Install required libraries: ```{code} sh @@ -124,14 +124,14 @@ Make sure you have [Microsoft Visual C++ Redistributable](https://aka.ms/vs/17/r Download and unpack model server archive for Windows(with python): ```bat -curl -L https://github.com/openvinotoolkit/model_server/releases/download/v2026.1/ovms_windows_2026.1.0_python_on.zip -o ovms.zip +curl -L https://github.com/openvinotoolkit/model_server/releases/download/v2026.2/ovms_windows_2026.2.0_python_on.zip -o ovms.zip tar -xf ovms.zip ``` or archive without python: ```bat -curl -L https://github.com/openvinotoolkit/model_server/releases/download/v2026.1/ovms_windows_2026.1.0_python_off.zip -o ovms.zip +curl -L https://github.com/openvinotoolkit/model_server/releases/download/v2026.2/ovms_windows_2026.2.0_python_off.zip -o ovms.zip tar -xf ovms.zip ``` diff --git a/docs/pull_optimum_cli.md b/docs/pull_optimum_cli.md index bcf3159132..8177a7946e 100644 --- a/docs/pull_optimum_cli.md +++ b/docs/pull_optimum_cli.md @@ -15,7 +15,7 @@ mkdir models ## Add optimum-cli to OVMS installation on windows ```bat -curl -L https://github.com/openvinotoolkit/model_server/releases/download/v2026.1/ovms_windows_2026.1.0_python_on.zip -o ovms.zip +curl -L https://github.com/openvinotoolkit/model_server/releases/download/v2026.2/ovms_windows_2026.2.0_python_on.zip -o ovms.zip tar -xf ovms.zip ovms\setupvars.bat ovms\python\python -m pip install -r https://raw.githubusercontent.com/openvinotoolkit/model_server/refs/heads/main/demos/common/export_models/requirements.txt