Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -75,9 +75,9 @@ FUZZER_BUILD ?= 0
# - uncomment source build section, comment binary section
# - adjust binary version path - version variable is not passed to WORKSPACE file!

OV_SOURCE_BRANCH ?= 9a5c0f67aa9bfe780972eaa721ccfa082323e9a4 # master branch
OV_TOKENIZERS_BRANCH ?= 85480f170beba3a975cf908bc688a4398424aba8 # master branch
OV_GENAI_BRANCH ?= d93080c377f934a1b4acf371700313cd98f369b9 # master branch
OV_SOURCE_BRANCH ?= b345118847e7d22a9c427ef78be443f83242655f # master branch
OV_TOKENIZERS_BRANCH ?= 933000946429de3f7aeea6fa7be0c07af96240cf # master branch
OV_GENAI_BRANCH ?= 26af9fdd2ac0029b26672eedc778a9a8ff69165d # master branch

OV_SOURCE_ORG ?= openvinotoolkit
OV_GENAI_ORG ?= openvinotoolkit
Expand Down Expand Up @@ -172,11 +172,11 @@ ifeq ($(findstring ubuntu,$(BASE_OS)),ubuntu)
ifeq ($(BASE_OS_TAG),24.04)
OS=ubuntu24
INSTALL_DRIVER_VERSION ?= "25.48.36300"
DLDT_PACKAGE_URL ?= https://storage.openvinotoolkit.org/repositories/openvino_genai/packages/nightly/2026.1.0.0.dev20260225/openvino_genai_ubuntu24_2026.1.0.0.dev20260225_x86_64.tar.gz
DLDT_PACKAGE_URL ?= https://storage.openvinotoolkit.org/repositories/openvino_genai/packages/nightly/2026.1.0.0.dev20260305/openvino_genai_ubuntu24_2026.1.0.0.dev20260305_x86_64.tar.gz
else ifeq ($(BASE_OS_TAG),22.04)
OS=ubuntu22
INSTALL_DRIVER_VERSION ?= "24.39.31294"
DLDT_PACKAGE_URL ?= https://storage.openvinotoolkit.org/repositories/openvino_genai/packages/nightly/2026.1.0.0.dev20260225/openvino_genai_ubuntu22_2026.1.0.0.dev20260225_x86_64.tar.gz
DLDT_PACKAGE_URL ?= https://storage.openvinotoolkit.org/repositories/openvino_genai/packages/nightly/2026.1.0.0.dev20260305/openvino_genai_ubuntu22_2026.1.0.0.dev20260305_x86_64.tar.gz
endif
endif
ifeq ($(BASE_OS),redhat)
Expand All @@ -185,7 +185,7 @@ ifeq ($(BASE_OS),redhat)
BASE_IMAGE ?= registry.access.redhat.com/ubi9/ubi:$(BASE_OS_TAG_REDHAT)
BASE_IMAGE_RELEASE=registry.access.redhat.com/ubi9/ubi-minimal:$(BASE_OS_TAG_REDHAT)
DIST_OS=redhat
DLDT_PACKAGE_URL ?= https://storage.openvinotoolkit.org/repositories/openvino_genai/packages/nightly/2026.1.0.0.dev20260225/openvino_genai_rhel8_2026.1.0.0.dev20260225_x86_64.tar.gz # not used
DLDT_PACKAGE_URL ?= https://storage.openvinotoolkit.org/repositories/openvino_genai/packages/nightly/2026.1.0.0.dev20260305/openvino_genai_rhel8_2026.1.0.0.dev20260305_x86_64.tar.gz # not used
INSTALL_DRIVER_VERSION ?= "24.52.32224"
endif

Expand Down
4 changes: 2 additions & 2 deletions demos/common/export_models/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ diffusers # for image generation
einops
nncf@git+https://github.com/openvinotoolkit/nncf.git
numpy==2.2.6 # temporary change until ov tokenizers upgrade
openvino-tokenizers==2026.1.0.0.dev20260225
openvino==2026.1.0.dev20260225
openvino-tokenizers==2026.1.0.0.dev20260305
openvino==2026.1.0.dev20260305
pillow
sentence_transformers
sentencepiece # Required by: transformers`
Expand Down
48 changes: 43 additions & 5 deletions src/llm/language_model/continuous_batching/llm_executor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include <cstdint>
#include <memory>
#include <mutex>
#include <string>
#include <thread>
#include <utility>

Expand All @@ -31,13 +32,15 @@

namespace ovms {
struct LLMExecutor {
bool isDynamicKVCache;
// For logging purposes we could have more information about graph and node here
std::mutex mutex;
std::condition_variable cv;
std::shared_ptr<ov::genai::ContinuousBatchingPipeline> pipe = nullptr;

LLMExecutor(std::shared_ptr<ov::genai::ContinuousBatchingPipeline> pipe) {
LLMExecutor(std::shared_ptr<ov::genai::ContinuousBatchingPipeline> pipe, bool isDynamicKVCacheSet = false) {
this->pipe = std::move(pipe);
this->isDynamicKVCache = isDynamicKVCacheSet;
}

bool hasRequests() {
Expand All @@ -59,12 +62,47 @@ struct LLMExecutor {
cv.notify_one();
}

std::string formatCacheInfo(float cacheUsage, size_t cacheBytes, bool isCacheDynamic) {
std::ostringstream oss;
oss << std::fixed << std::setprecision(1);
if (isCacheDynamic) {
oss << "dynamic " << cacheUsage << "% of " << formatBytes(cacheBytes);
} else {
oss << "static " << cacheUsage << "% of " << formatBytes(cacheBytes);
}

return oss.str();
}

std::string formatBytes(size_t bytes) {
const double KB = 1024.0;
const double MB = KB * 1024.0;
const double GB = MB * 1024.0;
const double TB = GB * 1024.0;

std::ostringstream oss;
oss << std::fixed << std::setprecision(1);

if (bytes >= TB)
oss << (bytes / TB) << " TB";
else if (bytes >= GB)
oss << (bytes / GB) << " GB";
else if (bytes >= MB)
oss << (bytes / MB) << " MB";
else if (bytes >= KB)
oss << (bytes / KB) << " KB";
else
oss << bytes << " B";

return oss.str();
}

#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-but-set-variable"
void printMetrics() {
ov::genai::PipelineMetrics metrics = pipe->get_metrics();
SPDLOG_LOGGER_INFO(llm_executor_logger, "All requests: {}; Scheduled requests: {}; Cache usage {:.1f}%;",
metrics.requests, metrics.scheduled_requests, metrics.cache_usage);
SPDLOG_LOGGER_INFO(llm_executor_logger, "All requests: {}; Scheduled requests: {}; Cache usage {};",
metrics.requests, metrics.scheduled_requests, formatCacheInfo(metrics.cache_usage, metrics.kv_cache_size_in_bytes, this->isDynamicKVCache));
}
};
#pragma GCC diagnostic pop
Expand Down Expand Up @@ -98,8 +136,8 @@ class LLMExecutorWrapper {
}

public:
LLMExecutorWrapper(std::shared_ptr<ov::genai::ContinuousBatchingPipeline> pipe) :
llmExecutor(std::move(pipe)) {
LLMExecutorWrapper(std::shared_ptr<ov::genai::ContinuousBatchingPipeline> pipe, bool isDynamicKVCache = false) :
llmExecutor(std::move(pipe), isDynamicKVCache) {
llmExecutorThread = std::thread(LLMExecutorWrapper::run, &llmExecutor, &finishExecutorThread);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,7 @@ Status ContinuousBatchingServableInitializer::initialize(std::shared_ptr<GenAiSe
}
properties->maxModelLength = parseMaxModelLength(parsedModelsPath);

properties->llmExecutorWrapper = std::make_shared<LLMExecutorWrapper>(properties->pipeline);
properties->llmExecutorWrapper = std::make_shared<LLMExecutorWrapper>(properties->pipeline, properties->schedulerConfig.cache_size == 0);

return StatusCode::OK;
}
Expand Down
8 changes: 4 additions & 4 deletions windows_install_build_dependencies.bat
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ IF "%OV_USE_BINARY%"=="0" (
::::::::::::::::::::::: GENAI/OPENVINO install from ZIP - reinstalled per build trigger
:: Set default GENAI_PACKAGE_URL if not set
if "%GENAI_PACKAGE_URL%"=="" (
set "GENAI_PACKAGE_URL=https://storage.openvinotoolkit.org/repositories/openvino_genai/packages/nightly/2026.1.0.0.dev20260225/openvino_genai_windows_2026.1.0.0.dev20260225_x86_64.zip"
set "GENAI_PACKAGE_URL=https://storage.openvinotoolkit.org/repositories/openvino_genai/packages/nightly/2026.1.0.0.dev20260305/openvino_genai_windows_2026.1.0.0.dev20260305_x86_64.zip"
)

:: Extract genai_ver from GENAI_PACKAGE_URL (filename)
Expand Down Expand Up @@ -208,7 +208,7 @@ IF /I EXIST %BAZEL_SHORT_PATH%\openvino (
rmdir /S /Q %BAZEL_SHORT_PATH%\openvino
)
if "%OV_SOURCE_BRANCH%"=="" (
set "OV_SOURCE_BRANCH=9a5c0f67aa9bfe780972eaa721ccfa082323e9a4"
set "OV_SOURCE_BRANCH=b345118847e7d22a9c427ef78be443f83242655f"
)
if "%OV_SOURCE_ORG%"=="" (
set "OV_SOURCE_ORG=openvinotoolkit"
Expand All @@ -217,13 +217,13 @@ if "%TOKENIZER_SOURCE_ORG%"=="" (
set "TOKENIZER_SOURCE_ORG=openvinotoolkit"
)
if "%TOKENIZER_SOURCE_BRANCH%"=="" (
set "TOKENIZER_SOURCE_BRANCH=85480f170beba3a975cf908bc688a4398424aba8"
set "TOKENIZER_SOURCE_BRANCH=933000946429de3f7aeea6fa7be0c07af96240cf"
)
if "%GENAI_SOURCE_ORG%"=="" (
set "GENAI_SOURCE_ORG=openvinotoolkit"
)
if "%GENAI_SOURCE_BRANCH%"=="" (
set "GENAI_SOURCE_BRANCH=d93080c377f934a1b4acf371700313cd98f369b9"
set "GENAI_SOURCE_BRANCH=26af9fdd2ac0029b26672eedc778a9a8ff69165d"
)

echo [INFO] Using OpenVINO source from %OV_SOURCE_ORG%
Expand Down