openvinotoolkit · dtrawins · Dec 22, 2025 · Dec 22, 2025 · Dec 23, 2025 · Dec 23, 2025
diff --git a/Dockerfile.redhat b/Dockerfile.redhat
@@ -111,8 +111,8 @@ ARG VERBOSE_LOGS=OFF
 ARG LTO_ENABLE=OFF
 
 # hadolint ignore=DL3041
-RUN dnf install -y https://rpmfind.net/linux/almalinux/8.10/PowerTools/x86_64/os/Packages/opencl-headers-2.2-1.20180306gite986688.el8.noarch.rpm && \
-            dnf update -d6 -y && dnf install -d6 -y \
+RUN dnf install -y -d6 \
+            https://vault.centos.org/centos/8-stream/PowerTools/x86_64/os/Packages/opencl-headers-2.2-1.20180306gite986688.el8.noarch.rpm \
             gdb \
             java-11-openjdk-devel \
             tzdata-java \
@@ -221,22 +221,16 @@ WORKDIR /openvino_tokenizers/
 ARG ov_tokenizers_branch=85be884a69f10270703f81f970a5ee596a4c8df7
 ARG ov_tokenizers_org=openvinotoolkit
 ARG SDL_OPS="-fpic -O2 -U_FORTIFY_SOURCE -fstack-protector -fno-omit-frame-pointer -D_FORTIFY_SOURCE=1 -fno-strict-overflow -Wall -Wno-unknown-pragmas -Wno-error=sign-compare -fno-delete-null-pointer-checks -fwrapv -fstack-clash-protection -Wformat -Wformat-security -Werror=format-security -s -D_GLIBCXX_USE_CXX11_ABI=1 -Wno-error=deprecated-declarations -Wuninitialized"
-# hadolint ignore=DL3003
-RUN git clone https://github.com/$ov_tokenizers_org/openvino_tokenizers.git /openvino_tokenizers && cd /openvino_tokenizers && git checkout $ov_tokenizers_branch && git submodule update --init --recursive
-RUN if ! [[ $debug_bazel_flags == *"_py_off"* ]]; then \
-    mkdir -p /opt/intel/openvino/python/openvino_tokenizers/lib ; \
-    cp -r python/* /opt/intel/openvino/python/ ; \
-    mkdir -p /opt/intel/openvino/python/openvino_tokenizers-2025.4.dist-info ; \
-    echo $'Metadata-Version: 1.0\nName: openvino-tokenizers\nVersion: 2025.4\nRequires-Python: >=3.9\nRequires-Dist: openvino~=2025.4.1' > /opt/intel/openvino/python/openvino_tokenizers-2025.4.dist-info/METADATA ; \
-    ln -s /ovms/lib/libopenvino_tokenizers.so /opt/intel/openvino/python/openvino_tokenizers/lib/libopenvino_tokenizers.so ; \
-    fi
 
 # hadolint ignore=DL3003
-RUN if [ "$ov_use_binary" == "0" ]; then true ; else exit 0 ; fi ; \
-    cd /openvino_tokenizers && cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_VERBOSE_MAKEFILE="${VERBOSE_LOGS}" -DCMAKE_CXX_FLAGS=" ${SDL_OPS} ${LTO_CXX_FLAGS} " -DCMAKE_SHARED_LINKER_FLAGS="${LTO_LD_FLAGS}" -S ./ -B ./build/  && cmake --build ./build/ --parallel $JOBS && cp /openvino_tokenizers/build/src/lib*.so /opt/intel/openvino/runtime/lib/intel64/ ; \
-    # Install the openvino_tokenizers python bindings and copy to OpenVINO location
-    if ! [[ $debug_bazel_flags == *"_py_off"* ]]; then \
-    cp build/python/* /opt/intel/openvino/python/openvino_tokenizers/ ; \
+RUN if ! [[ $debug_bazel_flags == *"_py_off"* ]]; then \  
+# python tokenizers built always from source because it is not in binary package
+    git clone https://github.com/$ov_tokenizers_org/openvino_tokenizers.git /openvino_tokenizers && cd /openvino_tokenizers && git checkout $ov_tokenizers_branch && git submodule update --init --recursive && \
+    sed -i '/openvino~=/d' /openvino_tokenizers/pyproject.toml && \
+    sed -i '/requires-python/d' /openvino_tokenizers/pyproject.toml && \
+    cd /openvino_tokenizers && cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_VERBOSE_MAKEFILE="${VERBOSE_LOGS}" -DCMAKE_CXX_FLAGS=" ${SDL_OPS} ${LTO_CXX_FLAGS} " -DCMAKE_SHARED_LINKER_FLAGS="${LTO_LD_FLAGS}" -S ./ -B ./build/  && cmake --build ./build/ --parallel $JOBS && cp /openvino_tokenizers/build/src/lib*.so /opt/intel/openvino/runtime/lib/intel64/ && \
+    python3 -m pip wheel -v --no-deps --wheel-dir wheel /openvino_tokenizers && \
+    python3 -m pip install --no-cache-dir "$(find wheel -name 'openvino_tokenizers*.whl')" --target /opt/intel/openvino/python ; \
     fi
 
 WORKDIR /openvino_genai/
@@ -289,8 +283,8 @@ WORKDIR /ovms/src/example/SampleCpuExtension/
 RUN make
 
 RUN if ! [[ $debug_bazel_flags == *"py_off"* ]]; then true ; else exit 0 ; fi ; \
-    mkdir -p /opt/intel/openvino/python/openvino-4.dist-info && \
-    echo $'Metadata-Version: 1.0\nName: openvino\nVersion: 2025.4' > /opt/intel/openvino/python/openvino-4.dist-info/METADATA
+    mkdir -p /opt/intel/openvino/python/openvino-2025.4.1.dist-info && \
+    echo $'Metadata-Version: 1.0\nName: openvino\nVersion: 2025.4.1' > /opt/intel/openvino/python/openvino-2025.4.1.dist-info/METADATA
 ENV PYTHONPATH=/opt/intel/openvino/python:/ovms/bazel-bin/src/python/binding
 
 WORKDIR /patchelf

diff --git a/ci/build_test_OnCommit.groovy b/ci/build_test_OnCommit.groovy
@@ -166,7 +166,7 @@ pipeline {
                 label "${agent_name_linux}"
               }
               steps {
-                sh "make release_image RUN_TESTS=0 OV_USE_BINARY=0 BASE_OS=redhat OVMS_CPP_IMAGE_TAG=${shortCommit} BUILD_IMAGE=openvino/model_server-build:${shortCommit}"
+                sh "make release_image RUN_TESTS=1 OV_USE_BINARY=0 BASE_OS=redhat OVMS_CPP_IMAGE_TAG=${shortCommit} BUILD_IMAGE=openvino/model_server-build:${shortCommit}"
                 sh "make run_lib_files_test BASE_OS=redhat OVMS_CPP_IMAGE_TAG=${shortCommit}"
                 script {
                   dir ('internal_tests'){ 

diff --git a/create_package.sh b/create_package.sh
@@ -67,10 +67,10 @@ fi
 # Add Python bindings for pyovms, openvino, openvino_tokenizers and openvino_genai, so they are all available for OVMS Python servables
 if ! [[ $debug_bazel_flags == *"_py_off"* ]]; then cp -r /opt/intel/openvino/python /ovms_release/lib/python ; fi
 if ! [[ $debug_bazel_flags == *"_py_off"* ]] && [ "$FUZZER_BUILD" == "0" ]; then mv /ovms_release/lib/pyovms.so /ovms_release/lib/python ; fi
-if ! [[ $debug_bazel_flags == *"_py_off"* ]]; then echo $'#!/bin/bash\npython3 -m openvino_tokenizers.cli "$@"' > /ovms_release/bin/convert_tokenizer ; \
+if ! [[ $debug_bazel_flags == *"_py_off"* ]]; then mv /ovms_release/lib/python/bin/convert_tokenizer /ovms_release/bin/convert_tokenizer ; \
    chmod +x /ovms_release/bin/convert_tokenizer ; fi
-if  ! [[ $debug_bazel_flags == *"_py_off"* ]]; then	mkdir -p /ovms_release/lib/python/openvino_genai-2025.4.dist-info ; \
-	echo $'Metadata-Version: 1.0\nName: openvino-genai\nVersion: 2025.4\nRequires-Python: >=3.9\nRequires-Dist: openvino-genai~=2025.4.1' > /ovms_release/lib/python/openvino_genai-2025.4.dist-info/METADATA; fi
+if  ! [[ $debug_bazel_flags == *"_py_off"* ]]; then	mkdir -p /ovms_release/lib/python/openvino_genai-2025.4.1.dist-info ; \
+	echo $'Metadata-Version: 1.0\nName: openvino-genai\nVersion: 2025.4.1\nRequires-Python: >=3.9\nRequires-Dist: openvino-genai~=2025.4.1' > /ovms_release/lib/python/openvino_genai-2025.4.1.dist-info/METADATA; fi
 
 if [ -f /opt/intel/openvino/runtime/lib/intel64/plugins.xml ]; then cp /opt/intel/openvino/runtime/lib/intel64/plugins.xml /ovms_release/lib/ ; fi
 find /opt/intel/openvino/runtime/lib/intel64/ -iname '*.mvcmd*' -exec cp -v {} /ovms_release/lib/ \;

diff --git a/demos/common/export_models/requirements.txt b/demos/common/export_models/requirements.txt
@@ -1,12 +1,12 @@
 --extra-index-url "https://download.pytorch.org/whl/cpu"
-optimum-intel@git+https://github.com/huggingface/optimum-intel.git@aed07975d817c124fd5d45375ac131d4a068b557
+optimum-intel@git+https://github.com/huggingface/optimum-intel.git@a484bc6ee1175bbe8868bb53d2c42ab4c4802aa6
 accelerate==1.11.0
 diffusers==0.35.2  # for image generation
 einops==0.8.1
 nncf==2.19.0
 numpy==2.2.6
-openvino-tokenizers==2025.4.0.0
-openvino==2025.4.0
+openvino-tokenizers==2025.4.1.0
+openvino==2025.4.1
 #optimum is in dependency list of optimum-intel
 pillow==12.0.0
 sentence_transformers==5.1.2

diff --git a/demos/python_demos/Dockerfile.redhat b/demos/python_demos/Dockerfile.redhat
@@ -21,6 +21,6 @@ ENV PYTHONPATH=/ovms/lib/python
 RUN if [ -f /usr/bin/dnf ] ; then export DNF_TOOL=dnf ; else export DNF_TOOL=microdnf ; fi ; \
 	$DNF_TOOL install -y python3-pip git
 COPY requirements.txt .
-RUN BUILD_CUDA_EXT=0 pip3 install -r requirements.txt
+RUN pip3 install -r requirements.txt
 USER ovms
 ENTRYPOINT [ "/ovms/bin/ovms" ]
diff --git a/demos/python_demos/requirements.txt b/demos/python_demos/requirements.txt
@@ -1,13 +1,13 @@
 --extra-index-url "https://download.pytorch.org/whl/cpu"
-optimum-intel@git+https://github.com/huggingface/optimum-intel.git
+optimum-intel@git+https://github.com/huggingface/optimum-intel.git@a484bc6ee1175bbe8868bb53d2c42ab4c4802aa6
 pillow==10.3.0
 tritonclient[grpc]==2.57.0  # Required to use batch string serialization/deserialization (4byte length prepend)
 numpy<2.0
 huggingface_hub==0.32.0
 nncf>=2.11.0
 sentence_transformers
 sentencepiece==0.2.0
-transformers<=4.53
+transformers<4.56
 einops
 torchvision
 timm==1.0.15

diff --git a/docs/deploying_server_baremetal.md b/docs/deploying_server_baremetal.md
@@ -15,12 +15,12 @@ You can download model server package in two configurations. One with Python sup
 :sync: ubuntu-22-04
 Download precompiled package (without python):
 ```{code} sh
-wget https://github.com/openvinotoolkit/model_server/releases/download/v2025.4/ovms_ubuntu22.tar.gz
+wget https://github.com/openvinotoolkit/model_server/releases/download/v2025.4.1/ovms_ubuntu22.tar.gz
 tar -xzvf ovms_ubuntu22.tar.gz
 ```
 or precompiled package (with python):
 ```{code} sh
-wget https://github.com/openvinotoolkit/model_server/releases/download/v2025.4/ovms_ubuntu22_python_on.tar.gz
+wget https://github.com/openvinotoolkit/model_server/releases/download/v2025.4.1/ovms_ubuntu22_python_on.tar.gz
 tar -xzvf ovms_ubuntu22_python_on.tar.gz
 ```
 Install required libraries:
@@ -50,12 +50,12 @@ Model server version with Python is shipped with those packages and new installa
 :sync: ubuntu-24-04
 Download precompiled package (without python):
 ```{code} sh
-wget https://github.com/openvinotoolkit/model_server/releases/download/v2025.4/ovms_ubuntu24.tar.gz
+wget https://github.com/openvinotoolkit/model_server/releases/download/v2025.4.1/ovms_ubuntu24.tar.gz
 tar -xzvf ovms_ubuntu24.tar.gz
 ```
 or precompiled package (with python):
 ```{code} sh
-wget https://github.com/openvinotoolkit/model_server/releases/download/v2025.4/ovms_ubuntu24_python_on.tar.gz
+wget https://github.com/openvinotoolkit/model_server/releases/download/v2025.4.1/ovms_ubuntu24_python_on.tar.gz
 tar -xzvf ovms_ubuntu24_python_on.tar.gz
 ```
 Install required libraries:
@@ -85,12 +85,12 @@ Model server version with Python is shipped with those packages and new installa
 :sync: rhel-9.6
 Download precompiled package (without python):
 ```{code} sh
-wget https://github.com/openvinotoolkit/model_server/releases/download/v2025.4/ovms_redhat.tar.gz
+wget https://github.com/openvinotoolkit/model_server/releases/download/v2025.4.1/ovms_redhat.tar.gz
 tar -xzvf ovms_redhat.tar.gz
 ```
 or precompiled package (with python):
 ```{code} sh
-wget https://github.com/openvinotoolkit/model_server/releases/download/v2025.4/ovms_redhat_python_on.tar.gz
+wget https://github.com/openvinotoolkit/model_server/releases/download/v2025.4.1/ovms_redhat_python_on.tar.gz
 tar -xzvf ovms_redhat_python_on.tar.gz
 ```
 Install required libraries:
@@ -124,14 +124,14 @@ Make sure you have [Microsoft Visual C++ Redistributable](https://aka.ms/vs/17/r
 Download and unpack model server archive for Windows(with python):
 
 ```bat
-curl -L https://github.com/openvinotoolkit/model_server/releases/download/v2025.4/ovms_windows_python_on.zip -o ovms.zip
+curl -L https://github.com/openvinotoolkit/model_server/releases/download/v2025.4.1/ovms_windows_python_on.zip -o ovms.zip
 tar -xf ovms.zip
 ```
 
 or archive without python:
 
 ```bat
-curl -L https://github.com/openvinotoolkit/model_server/releases/download/v2025.4/ovms_windows_python_off.zip -o ovms.zip
+curl -L https://github.com/openvinotoolkit/model_server/releases/download/v2025.4.1/ovms_windows_python_off.zip -o ovms.zip
 tar -xf ovms.zip
 ```
 

diff --git a/prepare_llm_models.sh b/prepare_llm_models.sh
@@ -14,13 +14,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
+
 set -e
 if [ -z "$1" ]; then
   echo "Error: No directory specified."
   exit 1
 fi
 
-CB_MODEL="facebook/opt-125m"
+CB_MODEL="HuggingFaceTB/SmolLM2-360M-Instruct"
+FACEBOOK="facebook/opt-125m"
 TOKENIZER_FILE="openvino_tokenizer.bin"
 LEGACY_MODEL_FILE="1/model.bin"
 EMBEDDING_MODEL="thenlper/gte-small"
@@ -29,7 +31,7 @@ VLM_MODEL="OpenGVLab/InternVL2-1B"
 
 # Models for tools testing. Only tokenizers are downloaded.
 QWEN3_MODEL="Qwen/Qwen3-8B"
-LLAMA3_MODEL="meta-llama/Llama-3.1-8B-Instruct"
+LLAMA3_MODEL="unsloth/Llama-3.1-8B-Instruct"
 HERMES3_MODEL="NousResearch/Hermes-3-Llama-3.1-8B"
 PHI4_MODEL="microsoft/Phi-4-mini-instruct"
 MISTRAL_MODEL="mistralai/Mistral-7B-Instruct-v0.3"
@@ -57,12 +59,10 @@ if [ "$(python3 -c 'import sys; print(sys.version_info[1])')" -le "8" ]; then ec
 echo "Downloading LLM testing models to directory $1"
 export PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu https://storage.openvinotoolkit.org/simple/wheels/nightly"
 if [ "$2" = "docker" ]; then
-    sed -i '/openvino~=/d' /openvino_tokenizers/pyproject.toml
-    python3 -m pip wheel -v --no-deps --wheel-dir wheel /openvino_tokenizers
-    python3 -m pip install $(find wheel -name 'openvino_tokenizers*.whl')
-    python3 -m pip install "optimum-intel"@git+https://github.com/huggingface/optimum-intel.git nncf sentence_transformers==3.1.1
+    export PATH=$PATH:/opt/intel/openvino/python/bin
+    python3 -m pip install "optimum-intel"@git+https://github.com/huggingface/optimum-intel.git@75d6b7d3bc9544487e2111a610b59f8d62e0ef89 nncf sentence_transformers einops timm sentencepiece
 else
-    python3.10 -m venv .venv
+    python3 -m venv .venv
     . .venv/bin/activate
     pip3 install -U pip
     pip3 install -U -r demos/common/export_models/requirements.txt
@@ -79,6 +79,16 @@ if [ ! -f "$1/$CB_MODEL/$TOKENIZER_FILE" ]; then
   exit 1
 fi
 
+if [ -f "$1/$FACEBOOK/$TOKENIZER_FILE" ]; then
+  echo "Models file $1/$FACEBOOK/$TOKENIZER_FILE exists. Skipping downloading models."
+else
+  python3 demos/common/export_models/export_model.py text_generation --source_model "$FACEBOOK" --weight-format int8 --model_repository_path $1
+fi
+if [ ! -f "$1/$FACEBOOK/$TOKENIZER_FILE" ]; then
+  echo "[ERROR] Models file $1/$FACEBOOK/$TOKENIZER_FILE does not exist."
+  exit 1
+fi
+
 if [ -f "$1/$VLM_MODEL/$TOKENIZER_FILE" ]; then
   echo "Model file $1/$VLM_MODEL/$TOKENIZER_FILE exists. Skipping downloading models."
 else

diff --git a/run_unit_tests.sh b/run_unit_tests.sh
@@ -35,6 +35,12 @@ ${debug_bazel_flags} \
 LD_LIBRARY_PATH=/opt/opencv/lib/:/opt/intel/openvino/runtime/lib/intel64/:/opt/intel/openvino/runtime/3rdparty/tbb/lib/
 PYTHONPATH=/opt/intel/openvino/python:/ovms/bazel-bin/src/python/binding
 
+# if https proxy is set in the environment and file .user.bazelrc doesn't exist yet, add proxy env for bazel test
+if [ -n "${HTTPS_PROXY}" ] && [ ! -f .user.bazelrc ] ; then
+    echo test:linux --test_env https_proxy=${HTTPS_PROXY} >> .user.bazelrc
+    echo test:linux --test_env http_proxy=${HTTP_PROXY} >> .user.bazelrc
+fi
+
 # Check if RUN_GPU_TESTS is set and add it to SHARED_OPTIONS
 if [ "$RUN_GPU_TESTS" == "1" ]; then
     if grep -q "ID=ubuntu" /etc/os-release; then

diff --git a/src/test/llm/llmnode_test.cpp b/src/test/llm/llmnode_test.cpp
@@ -99,7 +99,7 @@ class LLMFlowHttpTest : public ::testing::Test {
             plugin_config_t pluginConfig;
             // Setting precision to f32 fails on SPR hosts - to be investigated
             // JsonParser::parsePluginConfig("{\"INFERENCE_PRECISION_HINT\":\"f32\"}", pluginConfig);
-            cbPipe = std::make_shared<ov::genai::ContinuousBatchingPipeline>(getGenericFullPathForSrcTest("/ovms/src/test/llm_testing/facebook/opt-125m"), schedulerConfig, device, pluginConfig, tokenizerPluginConfig);
+            cbPipe = std::make_shared<ov::genai::ContinuousBatchingPipeline>(getGenericFullPathForSrcTest("/ovms/src/test/llm_testing/HuggingFaceTB/SmolLM2-360M-Instruct"), schedulerConfig, device, pluginConfig, tokenizerPluginConfig);
             llmExecutorWrapper = std::make_shared<LLMExecutorWrapper>(cbPipe);
         } catch (const std::exception& e) {
             SPDLOG_ERROR("Error during llm node initialization for models_path exception: {}", e.what());
@@ -598,6 +598,7 @@ TEST_P(LLMFlowHttpTestParameterized, unaryCompletionsJsonSpaceStopString) {
             "stream": false,
             "ignore_eos": false,
             "max_tokens": 1000,
+            "temperature": 0,
             "stop": " ",
             "include_stop_str_in_output": true,
             "prompt": "                                   |                                |                             |  "
@@ -1419,8 +1420,8 @@ TEST_P(LLMFlowHttpTestParameterized, unaryChatCompletionsPromptTokensWithMaxToke
         GTEST_SKIP();
     }
     std::string prompt;
-    // creating prompt that will be tokenized to 2048 tokens when model max length is 2048
-    for (int i = 0; i < 2044; i++) {
+    // creating prompt that will be tokenized to 8189 tokens when model max length is 8192; 29 are tokens from chat template.
+    for (int i = 0; i < 8192 - 29 - 3; i++) {
         prompt += "hello ";
     }
     std::string requestBody = R"(
@@ -1429,7 +1430,7 @@ TEST_P(LLMFlowHttpTestParameterized, unaryChatCompletionsPromptTokensWithMaxToke
                               R"(",
             "stream": false,
             "seed" : 1,
-            "max_tokens" : 5,
+            "max_tokens" : 10,
             "messages": [
             {
                 "role": "user",
@@ -1451,8 +1452,8 @@ TEST_P(LLMFlowHttpTestParameterized, unaryChatCompletionsPromptTokensWithMaxComp
         GTEST_SKIP();
     }
     std::string prompt;
-    // creating prompt that will be tokenized to 2048 tokens when model max length is 2048
-    for (int i = 0; i < 2044; i++) {
+    // creating prompt that will be tokenized to 8189 tokens when model max length is 8192; 25 are tokens from chat template.
+    for (int i = 0; i < 8191 - 25 - 3; i++) {
         prompt += "hello ";
     }
     std::string requestBody = R"(
@@ -1461,7 +1462,7 @@ TEST_P(LLMFlowHttpTestParameterized, unaryChatCompletionsPromptTokensWithMaxComp
                               R"(",
             "stream": false,
             "seed" : 1,
-            "max_completion_tokens": 5,
+            "max_completion_tokens": 10,
             "messages": [
             {
                 "role": "user",
@@ -1483,8 +1484,8 @@ TEST_P(LLMFlowHttpTestParameterized, unaryChatCompletionsPromptTokensEqualToMaxM
         GTEST_SKIP();
     }
     std::string prompt;
-    // creating prompt that will be tokenized to 2048 tokens when model max length is 2048
-    for (int i = 0; i < 2048; i++) {
+    // creating prompt that will be tokenized to 8194 tokens when model max length is 8192.
+    for (int i = 0; i < 8192 - 29; i++) {
         prompt += "hello ";
     }
     std::string requestBody = R"(
@@ -1514,8 +1515,8 @@ TEST_P(LLMFlowHttpTestParameterized, unaryChatCompletionsStoppedByMaxModelLength
         GTEST_SKIP();
     }
     std::string prompt;
-    // creating prompt that will be tokenized to 2044 tokens when model max length is 2048
-    for (int i = 0; i < 2044; i++) {
+    // creating prompt that will be tokenized to 2044 tokens when model max length is 8192; 25 are tokens from chat template.
+    for (int i = 0; i < 8192 - 29 - 3; i++) {
         prompt += "hello ";
     }
     std::string requestBody = R"(

diff --git a/src/test/llm/lm_cb_regular.pbtxt b/src/test/llm/lm_cb_regular.pbtxt
@@ -28,7 +28,7 @@ node {
     }
     node_options: {
         [type.googleapis.com/mediapipe.LLMCalculatorOptions]: {
-          models_path: "/ovms/src/test/llm_testing/facebook/opt-125m"
+          models_path: "/ovms/src/test/llm_testing/HuggingFaceTB/SmolLM2-360M-Instruct"
           cache_size: 1
         }
     }

diff --git a/src/test/llm/lm_legacy_regular.pbtxt b/src/test/llm/lm_legacy_regular.pbtxt
@@ -28,7 +28,7 @@ node {
     }
     node_options: {
         [type.googleapis.com/mediapipe.LLMCalculatorOptions]: {
-          models_path: "/ovms/src/test/llm_testing/facebook/opt-125m"
+          models_path: "/ovms/src/test/llm_testing/HuggingFaceTB/SmolLM2-360M-Instruct"
           cache_size: 1
           pipeline_type: LM
         }

diff --git a/src/test/llm/output_parsers/llama3_output_parser_test.cpp b/src/test/llm/output_parsers/llama3_output_parser_test.cpp
@@ -25,10 +25,10 @@
 using namespace ovms;
 
 #ifdef _WIN32
-const std::string tokenizerPath = getWindowsRepoRootPath() + "\\src\\test\\llm_testing\\meta-llama\\Llama-3.1-8B-Instruct";
+const std::string tokenizerPath = getWindowsRepoRootPath() + "\\src\\test\\llm_testing\\unsloth\\Llama-3.1-8B-Instruct";
 #else
 // Hardcoded for usage in docker container
-const std::string tokenizerPath = "/ovms/src/test/llm_testing/meta-llama/Llama-3.1-8B-Instruct";
+const std::string tokenizerPath = "/ovms/src/test/llm_testing/unsloth/Llama-3.1-8B-Instruct";
 #endif
 
 static const ovms::ToolsSchemas_t EMPTY_TOOLS_SCHEMA = {};  // not used for llama3