Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 12 additions & 18 deletions Dockerfile.redhat
Original file line number Diff line number Diff line change
Expand Up @@ -111,8 +111,8 @@ ARG VERBOSE_LOGS=OFF
ARG LTO_ENABLE=OFF

# hadolint ignore=DL3041
RUN dnf install -y https://rpmfind.net/linux/almalinux/8.10/PowerTools/x86_64/os/Packages/opencl-headers-2.2-1.20180306gite986688.el8.noarch.rpm && \
dnf update -d6 -y && dnf install -d6 -y \
RUN dnf install -y -d6 \
https://vault.centos.org/centos/8-stream/PowerTools/x86_64/os/Packages/opencl-headers-2.2-1.20180306gite986688.el8.noarch.rpm \
gdb \
java-11-openjdk-devel \
tzdata-java \
Expand Down Expand Up @@ -221,22 +221,16 @@ WORKDIR /openvino_tokenizers/
ARG ov_tokenizers_branch=85be884a69f10270703f81f970a5ee596a4c8df7
ARG ov_tokenizers_org=openvinotoolkit
ARG SDL_OPS="-fpic -O2 -U_FORTIFY_SOURCE -fstack-protector -fno-omit-frame-pointer -D_FORTIFY_SOURCE=1 -fno-strict-overflow -Wall -Wno-unknown-pragmas -Wno-error=sign-compare -fno-delete-null-pointer-checks -fwrapv -fstack-clash-protection -Wformat -Wformat-security -Werror=format-security -s -D_GLIBCXX_USE_CXX11_ABI=1 -Wno-error=deprecated-declarations -Wuninitialized"
# hadolint ignore=DL3003
RUN git clone https://github.com/$ov_tokenizers_org/openvino_tokenizers.git /openvino_tokenizers && cd /openvino_tokenizers && git checkout $ov_tokenizers_branch && git submodule update --init --recursive
RUN if ! [[ $debug_bazel_flags == *"_py_off"* ]]; then \
mkdir -p /opt/intel/openvino/python/openvino_tokenizers/lib ; \
cp -r python/* /opt/intel/openvino/python/ ; \
mkdir -p /opt/intel/openvino/python/openvino_tokenizers-2025.4.dist-info ; \
echo $'Metadata-Version: 1.0\nName: openvino-tokenizers\nVersion: 2025.4\nRequires-Python: >=3.9\nRequires-Dist: openvino~=2025.4.1' > /opt/intel/openvino/python/openvino_tokenizers-2025.4.dist-info/METADATA ; \
ln -s /ovms/lib/libopenvino_tokenizers.so /opt/intel/openvino/python/openvino_tokenizers/lib/libopenvino_tokenizers.so ; \
fi

# hadolint ignore=DL3003
RUN if [ "$ov_use_binary" == "0" ]; then true ; else exit 0 ; fi ; \
cd /openvino_tokenizers && cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_VERBOSE_MAKEFILE="${VERBOSE_LOGS}" -DCMAKE_CXX_FLAGS=" ${SDL_OPS} ${LTO_CXX_FLAGS} " -DCMAKE_SHARED_LINKER_FLAGS="${LTO_LD_FLAGS}" -S ./ -B ./build/ && cmake --build ./build/ --parallel $JOBS && cp /openvino_tokenizers/build/src/lib*.so /opt/intel/openvino/runtime/lib/intel64/ ; \
# Install the openvino_tokenizers python bindings and copy to OpenVINO location
if ! [[ $debug_bazel_flags == *"_py_off"* ]]; then \
cp build/python/* /opt/intel/openvino/python/openvino_tokenizers/ ; \
RUN if ! [[ $debug_bazel_flags == *"_py_off"* ]]; then \
# python tokenizers built always from source because it is not in binary package
git clone https://github.com/$ov_tokenizers_org/openvino_tokenizers.git /openvino_tokenizers && cd /openvino_tokenizers && git checkout $ov_tokenizers_branch && git submodule update --init --recursive && \
sed -i '/openvino~=/d' /openvino_tokenizers/pyproject.toml && \
sed -i '/requires-python/d' /openvino_tokenizers/pyproject.toml && \
cd /openvino_tokenizers && cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_VERBOSE_MAKEFILE="${VERBOSE_LOGS}" -DCMAKE_CXX_FLAGS=" ${SDL_OPS} ${LTO_CXX_FLAGS} " -DCMAKE_SHARED_LINKER_FLAGS="${LTO_LD_FLAGS}" -S ./ -B ./build/ && cmake --build ./build/ --parallel $JOBS && cp /openvino_tokenizers/build/src/lib*.so /opt/intel/openvino/runtime/lib/intel64/ && \
python3 -m pip wheel -v --no-deps --wheel-dir wheel /openvino_tokenizers && \
python3 -m pip install --no-cache-dir "$(find wheel -name 'openvino_tokenizers*.whl')" --target /opt/intel/openvino/python ; \
fi

WORKDIR /openvino_genai/
Expand Down Expand Up @@ -289,8 +283,8 @@ WORKDIR /ovms/src/example/SampleCpuExtension/
RUN make

RUN if ! [[ $debug_bazel_flags == *"py_off"* ]]; then true ; else exit 0 ; fi ; \
mkdir -p /opt/intel/openvino/python/openvino-4.dist-info && \
echo $'Metadata-Version: 1.0\nName: openvino\nVersion: 2025.4' > /opt/intel/openvino/python/openvino-4.dist-info/METADATA
mkdir -p /opt/intel/openvino/python/openvino-2025.4.1.dist-info && \
echo $'Metadata-Version: 1.0\nName: openvino\nVersion: 2025.4.1' > /opt/intel/openvino/python/openvino-2025.4.1.dist-info/METADATA
ENV PYTHONPATH=/opt/intel/openvino/python:/ovms/bazel-bin/src/python/binding

WORKDIR /patchelf
Expand Down
2 changes: 1 addition & 1 deletion ci/build_test_OnCommit.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ pipeline {
label "${agent_name_linux}"
}
steps {
sh "make release_image RUN_TESTS=0 OV_USE_BINARY=0 BASE_OS=redhat OVMS_CPP_IMAGE_TAG=${shortCommit} BUILD_IMAGE=openvino/model_server-build:${shortCommit}"
sh "make release_image RUN_TESTS=1 OV_USE_BINARY=0 BASE_OS=redhat OVMS_CPP_IMAGE_TAG=${shortCommit} BUILD_IMAGE=openvino/model_server-build:${shortCommit}"
sh "make run_lib_files_test BASE_OS=redhat OVMS_CPP_IMAGE_TAG=${shortCommit}"
script {
dir ('internal_tests'){
Expand Down
6 changes: 3 additions & 3 deletions create_package.sh
Original file line number Diff line number Diff line change
Expand Up @@ -67,10 +67,10 @@ fi
# Add Python bindings for pyovms, openvino, openvino_tokenizers and openvino_genai, so they are all available for OVMS Python servables
if ! [[ $debug_bazel_flags == *"_py_off"* ]]; then cp -r /opt/intel/openvino/python /ovms_release/lib/python ; fi
if ! [[ $debug_bazel_flags == *"_py_off"* ]] && [ "$FUZZER_BUILD" == "0" ]; then mv /ovms_release/lib/pyovms.so /ovms_release/lib/python ; fi
if ! [[ $debug_bazel_flags == *"_py_off"* ]]; then echo $'#!/bin/bash\npython3 -m openvino_tokenizers.cli "$@"' > /ovms_release/bin/convert_tokenizer ; \
if ! [[ $debug_bazel_flags == *"_py_off"* ]]; then mv /ovms_release/lib/python/bin/convert_tokenizer /ovms_release/bin/convert_tokenizer ; \
chmod +x /ovms_release/bin/convert_tokenizer ; fi
if ! [[ $debug_bazel_flags == *"_py_off"* ]]; then mkdir -p /ovms_release/lib/python/openvino_genai-2025.4.dist-info ; \
echo $'Metadata-Version: 1.0\nName: openvino-genai\nVersion: 2025.4\nRequires-Python: >=3.9\nRequires-Dist: openvino-genai~=2025.4.1' > /ovms_release/lib/python/openvino_genai-2025.4.dist-info/METADATA; fi
if ! [[ $debug_bazel_flags == *"_py_off"* ]]; then mkdir -p /ovms_release/lib/python/openvino_genai-2025.4.1.dist-info ; \
echo $'Metadata-Version: 1.0\nName: openvino-genai\nVersion: 2025.4.1\nRequires-Python: >=3.9\nRequires-Dist: openvino-genai~=2025.4.1' > /ovms_release/lib/python/openvino_genai-2025.4.1.dist-info/METADATA; fi

if [ -f /opt/intel/openvino/runtime/lib/intel64/plugins.xml ]; then cp /opt/intel/openvino/runtime/lib/intel64/plugins.xml /ovms_release/lib/ ; fi
find /opt/intel/openvino/runtime/lib/intel64/ -iname '*.mvcmd*' -exec cp -v {} /ovms_release/lib/ \;
Expand Down
6 changes: 3 additions & 3 deletions demos/common/export_models/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
--extra-index-url "https://download.pytorch.org/whl/cpu"
optimum-intel@git+https://github.com/huggingface/optimum-intel.git@aed07975d817c124fd5d45375ac131d4a068b557
optimum-intel@git+https://github.com/huggingface/optimum-intel.git@a484bc6ee1175bbe8868bb53d2c42ab4c4802aa6
accelerate==1.11.0
diffusers==0.35.2 # for image generation
einops==0.8.1
nncf==2.19.0
numpy==2.2.6
openvino-tokenizers==2025.4.0.0
openvino==2025.4.0
openvino-tokenizers==2025.4.1.0
openvino==2025.4.1
#optimum is in dependency list of optimum-intel
pillow==12.0.0
sentence_transformers==5.1.2
Expand Down
2 changes: 1 addition & 1 deletion demos/python_demos/Dockerfile.redhat
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,6 @@ ENV PYTHONPATH=/ovms/lib/python
RUN if [ -f /usr/bin/dnf ] ; then export DNF_TOOL=dnf ; else export DNF_TOOL=microdnf ; fi ; \
$DNF_TOOL install -y python3-pip git
COPY requirements.txt .
RUN BUILD_CUDA_EXT=0 pip3 install -r requirements.txt
RUN pip3 install -r requirements.txt
USER ovms
ENTRYPOINT [ "/ovms/bin/ovms" ]
4 changes: 2 additions & 2 deletions demos/python_demos/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
--extra-index-url "https://download.pytorch.org/whl/cpu"
optimum-intel@git+https://github.com/huggingface/optimum-intel.git
optimum-intel@git+https://github.com/huggingface/optimum-intel.git@a484bc6ee1175bbe8868bb53d2c42ab4c4802aa6
pillow==10.3.0
tritonclient[grpc]==2.57.0 # Required to use batch string serialization/deserialization (4byte length prepend)
numpy<2.0
huggingface_hub==0.32.0
nncf>=2.11.0
sentence_transformers
sentencepiece==0.2.0
transformers<=4.53
transformers<4.56
einops
torchvision
timm==1.0.15
Expand Down
16 changes: 8 additions & 8 deletions docs/deploying_server_baremetal.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,12 @@ You can download model server package in two configurations. One with Python sup
:sync: ubuntu-22-04
Download precompiled package (without python):
```{code} sh
wget https://github.com/openvinotoolkit/model_server/releases/download/v2025.4/ovms_ubuntu22.tar.gz
wget https://github.com/openvinotoolkit/model_server/releases/download/v2025.4.1/ovms_ubuntu22.tar.gz
tar -xzvf ovms_ubuntu22.tar.gz
```
or precompiled package (with python):
```{code} sh
wget https://github.com/openvinotoolkit/model_server/releases/download/v2025.4/ovms_ubuntu22_python_on.tar.gz
wget https://github.com/openvinotoolkit/model_server/releases/download/v2025.4.1/ovms_ubuntu22_python_on.tar.gz
tar -xzvf ovms_ubuntu22_python_on.tar.gz
```
Install required libraries:
Expand Down Expand Up @@ -50,12 +50,12 @@ Model server version with Python is shipped with those packages and new installa
:sync: ubuntu-24-04
Download precompiled package (without python):
```{code} sh
wget https://github.com/openvinotoolkit/model_server/releases/download/v2025.4/ovms_ubuntu24.tar.gz
wget https://github.com/openvinotoolkit/model_server/releases/download/v2025.4.1/ovms_ubuntu24.tar.gz
tar -xzvf ovms_ubuntu24.tar.gz
```
or precompiled package (with python):
```{code} sh
wget https://github.com/openvinotoolkit/model_server/releases/download/v2025.4/ovms_ubuntu24_python_on.tar.gz
wget https://github.com/openvinotoolkit/model_server/releases/download/v2025.4.1/ovms_ubuntu24_python_on.tar.gz
tar -xzvf ovms_ubuntu24_python_on.tar.gz
```
Install required libraries:
Expand Down Expand Up @@ -85,12 +85,12 @@ Model server version with Python is shipped with those packages and new installa
:sync: rhel-9.6
Download precompiled package (without python):
```{code} sh
wget https://github.com/openvinotoolkit/model_server/releases/download/v2025.4/ovms_redhat.tar.gz
wget https://github.com/openvinotoolkit/model_server/releases/download/v2025.4.1/ovms_redhat.tar.gz
tar -xzvf ovms_redhat.tar.gz
```
or precompiled package (with python):
```{code} sh
wget https://github.com/openvinotoolkit/model_server/releases/download/v2025.4/ovms_redhat_python_on.tar.gz
wget https://github.com/openvinotoolkit/model_server/releases/download/v2025.4.1/ovms_redhat_python_on.tar.gz
tar -xzvf ovms_redhat_python_on.tar.gz
```
Install required libraries:
Expand Down Expand Up @@ -124,14 +124,14 @@ Make sure you have [Microsoft Visual C++ Redistributable](https://aka.ms/vs/17/r
Download and unpack model server archive for Windows(with python):

```bat
curl -L https://github.com/openvinotoolkit/model_server/releases/download/v2025.4/ovms_windows_python_on.zip -o ovms.zip
curl -L https://github.com/openvinotoolkit/model_server/releases/download/v2025.4.1/ovms_windows_python_on.zip -o ovms.zip
tar -xf ovms.zip
```

or archive without python:

```bat
curl -L https://github.com/openvinotoolkit/model_server/releases/download/v2025.4/ovms_windows_python_off.zip -o ovms.zip
curl -L https://github.com/openvinotoolkit/model_server/releases/download/v2025.4.1/ovms_windows_python_off.zip -o ovms.zip
tar -xf ovms.zip
```

Expand Down
24 changes: 17 additions & 7 deletions prepare_llm_models.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,15 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#

set -e
if [ -z "$1" ]; then
echo "Error: No directory specified."
exit 1
fi

CB_MODEL="facebook/opt-125m"
CB_MODEL="HuggingFaceTB/SmolLM2-360M-Instruct"
FACEBOOK="facebook/opt-125m"
TOKENIZER_FILE="openvino_tokenizer.bin"
LEGACY_MODEL_FILE="1/model.bin"
EMBEDDING_MODEL="thenlper/gte-small"
Expand All @@ -29,7 +31,7 @@ VLM_MODEL="OpenGVLab/InternVL2-1B"

# Models for tools testing. Only tokenizers are downloaded.
QWEN3_MODEL="Qwen/Qwen3-8B"
LLAMA3_MODEL="meta-llama/Llama-3.1-8B-Instruct"
LLAMA3_MODEL="unsloth/Llama-3.1-8B-Instruct"
HERMES3_MODEL="NousResearch/Hermes-3-Llama-3.1-8B"
PHI4_MODEL="microsoft/Phi-4-mini-instruct"
MISTRAL_MODEL="mistralai/Mistral-7B-Instruct-v0.3"
Expand Down Expand Up @@ -57,12 +59,10 @@ if [ "$(python3 -c 'import sys; print(sys.version_info[1])')" -le "8" ]; then ec
echo "Downloading LLM testing models to directory $1"
export PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu https://storage.openvinotoolkit.org/simple/wheels/nightly"
if [ "$2" = "docker" ]; then
sed -i '/openvino~=/d' /openvino_tokenizers/pyproject.toml
python3 -m pip wheel -v --no-deps --wheel-dir wheel /openvino_tokenizers
python3 -m pip install $(find wheel -name 'openvino_tokenizers*.whl')
python3 -m pip install "optimum-intel"@git+https://github.com/huggingface/optimum-intel.git nncf sentence_transformers==3.1.1
export PATH=$PATH:/opt/intel/openvino/python/bin
python3 -m pip install "optimum-intel"@git+https://github.com/huggingface/optimum-intel.git@75d6b7d3bc9544487e2111a610b59f8d62e0ef89 nncf sentence_transformers einops timm sentencepiece
else
python3.10 -m venv .venv
python3 -m venv .venv
. .venv/bin/activate
pip3 install -U pip
pip3 install -U -r demos/common/export_models/requirements.txt
Expand All @@ -79,6 +79,16 @@ if [ ! -f "$1/$CB_MODEL/$TOKENIZER_FILE" ]; then
exit 1
fi

if [ -f "$1/$FACEBOOK/$TOKENIZER_FILE" ]; then
echo "Models file $1/$FACEBOOK/$TOKENIZER_FILE exists. Skipping downloading models."
else
python3 demos/common/export_models/export_model.py text_generation --source_model "$FACEBOOK" --weight-format int8 --model_repository_path $1
fi
if [ ! -f "$1/$FACEBOOK/$TOKENIZER_FILE" ]; then
echo "[ERROR] Models file $1/$FACEBOOK/$TOKENIZER_FILE does not exist."
exit 1
fi

if [ -f "$1/$VLM_MODEL/$TOKENIZER_FILE" ]; then
echo "Model file $1/$VLM_MODEL/$TOKENIZER_FILE exists. Skipping downloading models."
else
Expand Down
6 changes: 6 additions & 0 deletions run_unit_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,12 @@ ${debug_bazel_flags} \
LD_LIBRARY_PATH=/opt/opencv/lib/:/opt/intel/openvino/runtime/lib/intel64/:/opt/intel/openvino/runtime/3rdparty/tbb/lib/
PYTHONPATH=/opt/intel/openvino/python:/ovms/bazel-bin/src/python/binding

# if https proxy is set in the environment and file .user.bazelrc doesn't exist yet, add proxy env for bazel test
if [ -n "${HTTPS_PROXY}" ] && [ ! -f .user.bazelrc ] ; then
echo test:linux --test_env https_proxy=${HTTPS_PROXY} >> .user.bazelrc
echo test:linux --test_env http_proxy=${HTTP_PROXY} >> .user.bazelrc
fi

# Check if RUN_GPU_TESTS is set and add it to SHARED_OPTIONS
if [ "$RUN_GPU_TESTS" == "1" ]; then
if grep -q "ID=ubuntu" /etc/os-release; then
Expand Down
23 changes: 12 additions & 11 deletions src/test/llm/llmnode_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ class LLMFlowHttpTest : public ::testing::Test {
plugin_config_t pluginConfig;
// Setting precision to f32 fails on SPR hosts - to be investigated
// JsonParser::parsePluginConfig("{\"INFERENCE_PRECISION_HINT\":\"f32\"}", pluginConfig);
cbPipe = std::make_shared<ov::genai::ContinuousBatchingPipeline>(getGenericFullPathForSrcTest("/ovms/src/test/llm_testing/facebook/opt-125m"), schedulerConfig, device, pluginConfig, tokenizerPluginConfig);
cbPipe = std::make_shared<ov::genai::ContinuousBatchingPipeline>(getGenericFullPathForSrcTest("/ovms/src/test/llm_testing/HuggingFaceTB/SmolLM2-360M-Instruct"), schedulerConfig, device, pluginConfig, tokenizerPluginConfig);
llmExecutorWrapper = std::make_shared<LLMExecutorWrapper>(cbPipe);
} catch (const std::exception& e) {
SPDLOG_ERROR("Error during llm node initialization for models_path exception: {}", e.what());
Expand Down Expand Up @@ -598,6 +598,7 @@ TEST_P(LLMFlowHttpTestParameterized, unaryCompletionsJsonSpaceStopString) {
"stream": false,
"ignore_eos": false,
"max_tokens": 1000,
"temperature": 0,
"stop": " ",
"include_stop_str_in_output": true,
"prompt": " | | | "
Expand Down Expand Up @@ -1419,8 +1420,8 @@ TEST_P(LLMFlowHttpTestParameterized, unaryChatCompletionsPromptTokensWithMaxToke
GTEST_SKIP();
}
std::string prompt;
// creating prompt that will be tokenized to 2048 tokens when model max length is 2048
for (int i = 0; i < 2044; i++) {
// creating prompt that will be tokenized to 8189 tokens when model max length is 8192; 29 are tokens from chat template.
for (int i = 0; i < 8192 - 29 - 3; i++) {
prompt += "hello ";
}
std::string requestBody = R"(
Expand All @@ -1429,7 +1430,7 @@ TEST_P(LLMFlowHttpTestParameterized, unaryChatCompletionsPromptTokensWithMaxToke
R"(",
"stream": false,
"seed" : 1,
"max_tokens" : 5,
"max_tokens" : 10,
"messages": [
{
"role": "user",
Expand All @@ -1451,8 +1452,8 @@ TEST_P(LLMFlowHttpTestParameterized, unaryChatCompletionsPromptTokensWithMaxComp
GTEST_SKIP();
}
std::string prompt;
// creating prompt that will be tokenized to 2048 tokens when model max length is 2048
for (int i = 0; i < 2044; i++) {
// creating prompt that will be tokenized to 8189 tokens when model max length is 8192; 25 are tokens from chat template.
for (int i = 0; i < 8191 - 25 - 3; i++) {
prompt += "hello ";
}
std::string requestBody = R"(
Expand All @@ -1461,7 +1462,7 @@ TEST_P(LLMFlowHttpTestParameterized, unaryChatCompletionsPromptTokensWithMaxComp
R"(",
"stream": false,
"seed" : 1,
"max_completion_tokens": 5,
"max_completion_tokens": 10,
"messages": [
{
"role": "user",
Expand All @@ -1483,8 +1484,8 @@ TEST_P(LLMFlowHttpTestParameterized, unaryChatCompletionsPromptTokensEqualToMaxM
GTEST_SKIP();
}
std::string prompt;
// creating prompt that will be tokenized to 2048 tokens when model max length is 2048
for (int i = 0; i < 2048; i++) {
// creating prompt that will be tokenized to 8194 tokens when model max length is 8192.
for (int i = 0; i < 8192 - 29; i++) {
prompt += "hello ";
}
std::string requestBody = R"(
Expand Down Expand Up @@ -1514,8 +1515,8 @@ TEST_P(LLMFlowHttpTestParameterized, unaryChatCompletionsStoppedByMaxModelLength
GTEST_SKIP();
}
std::string prompt;
// creating prompt that will be tokenized to 2044 tokens when model max length is 2048
for (int i = 0; i < 2044; i++) {
// creating prompt that will be tokenized to 2044 tokens when model max length is 8192; 25 are tokens from chat template.
for (int i = 0; i < 8192 - 29 - 3; i++) {
prompt += "hello ";
}
std::string requestBody = R"(
Expand Down
2 changes: 1 addition & 1 deletion src/test/llm/lm_cb_regular.pbtxt
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ node {
}
node_options: {
[type.googleapis.com/mediapipe.LLMCalculatorOptions]: {
models_path: "/ovms/src/test/llm_testing/facebook/opt-125m"
models_path: "/ovms/src/test/llm_testing/HuggingFaceTB/SmolLM2-360M-Instruct"
cache_size: 1
}
}
Expand Down
2 changes: 1 addition & 1 deletion src/test/llm/lm_legacy_regular.pbtxt
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ node {
}
node_options: {
[type.googleapis.com/mediapipe.LLMCalculatorOptions]: {
models_path: "/ovms/src/test/llm_testing/facebook/opt-125m"
models_path: "/ovms/src/test/llm_testing/HuggingFaceTB/SmolLM2-360M-Instruct"
cache_size: 1
pipeline_type: LM
}
Expand Down
4 changes: 2 additions & 2 deletions src/test/llm/output_parsers/llama3_output_parser_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,10 @@
using namespace ovms;

#ifdef _WIN32
const std::string tokenizerPath = getWindowsRepoRootPath() + "\\src\\test\\llm_testing\\meta-llama\\Llama-3.1-8B-Instruct";
const std::string tokenizerPath = getWindowsRepoRootPath() + "\\src\\test\\llm_testing\\unsloth\\Llama-3.1-8B-Instruct";
#else
// Hardcoded for usage in docker container
const std::string tokenizerPath = "/ovms/src/test/llm_testing/meta-llama/Llama-3.1-8B-Instruct";
const std::string tokenizerPath = "/ovms/src/test/llm_testing/unsloth/Llama-3.1-8B-Instruct";
#endif

static const ovms::ToolsSchemas_t EMPTY_TOOLS_SCHEMA = {}; // not used for llama3
Expand Down
Loading