diff --git a/Makefile b/Makefile index 2492a25f1b..21adad465e 100644 --- a/Makefile +++ b/Makefile @@ -75,9 +75,9 @@ FUZZER_BUILD ?= 0 # - uncomment source build section, comment binary section # - adjust binary version path - version variable is not passed to WORKSPACE file! -OV_SOURCE_BRANCH ?= 9a5c0f67aa9bfe780972eaa721ccfa082323e9a4 # master branch -OV_TOKENIZERS_BRANCH ?= 85480f170beba3a975cf908bc688a4398424aba8 # master branch -OV_GENAI_BRANCH ?= d93080c377f934a1b4acf371700313cd98f369b9 # master branch +OV_SOURCE_BRANCH ?= b345118847e7d22a9c427ef78be443f83242655f # master branch +OV_TOKENIZERS_BRANCH ?= 933000946429de3f7aeea6fa7be0c07af96240cf # master branch +OV_GENAI_BRANCH ?= 26af9fdd2ac0029b26672eedc778a9a8ff69165d # master branch OV_SOURCE_ORG ?= openvinotoolkit OV_GENAI_ORG ?= openvinotoolkit @@ -172,11 +172,11 @@ ifeq ($(findstring ubuntu,$(BASE_OS)),ubuntu) ifeq ($(BASE_OS_TAG),24.04) OS=ubuntu24 INSTALL_DRIVER_VERSION ?= "25.48.36300" - DLDT_PACKAGE_URL ?= https://storage.openvinotoolkit.org/repositories/openvino_genai/packages/nightly/2026.1.0.0.dev20260225/openvino_genai_ubuntu24_2026.1.0.0.dev20260225_x86_64.tar.gz + DLDT_PACKAGE_URL ?= https://storage.openvinotoolkit.org/repositories/openvino_genai/packages/nightly/2026.1.0.0.dev20260305/openvino_genai_ubuntu24_2026.1.0.0.dev20260305_x86_64.tar.gz else ifeq ($(BASE_OS_TAG),22.04) OS=ubuntu22 INSTALL_DRIVER_VERSION ?= "24.39.31294" - DLDT_PACKAGE_URL ?= https://storage.openvinotoolkit.org/repositories/openvino_genai/packages/nightly/2026.1.0.0.dev20260225/openvino_genai_ubuntu22_2026.1.0.0.dev20260225_x86_64.tar.gz + DLDT_PACKAGE_URL ?= https://storage.openvinotoolkit.org/repositories/openvino_genai/packages/nightly/2026.1.0.0.dev20260305/openvino_genai_ubuntu22_2026.1.0.0.dev20260305_x86_64.tar.gz endif endif ifeq ($(BASE_OS),redhat) @@ -185,7 +185,7 @@ ifeq ($(BASE_OS),redhat) BASE_IMAGE ?= registry.access.redhat.com/ubi9/ubi:$(BASE_OS_TAG_REDHAT) BASE_IMAGE_RELEASE=registry.access.redhat.com/ubi9/ubi-minimal:$(BASE_OS_TAG_REDHAT) DIST_OS=redhat - DLDT_PACKAGE_URL ?= https://storage.openvinotoolkit.org/repositories/openvino_genai/packages/nightly/2026.1.0.0.dev20260225/openvino_genai_rhel8_2026.1.0.0.dev20260225_x86_64.tar.gz # not used + DLDT_PACKAGE_URL ?= https://storage.openvinotoolkit.org/repositories/openvino_genai/packages/nightly/2026.1.0.0.dev20260305/openvino_genai_rhel8_2026.1.0.0.dev20260305_x86_64.tar.gz # not used INSTALL_DRIVER_VERSION ?= "24.52.32224" endif diff --git a/demos/common/export_models/requirements.txt b/demos/common/export_models/requirements.txt index ed5df8b99c..2cf50b1e1c 100644 --- a/demos/common/export_models/requirements.txt +++ b/demos/common/export_models/requirements.txt @@ -9,8 +9,8 @@ diffusers # for image generation einops nncf@git+https://github.com/openvinotoolkit/nncf.git numpy==2.2.6 # temporary change until ov tokenizers upgrade -openvino-tokenizers==2026.1.0.0.dev20260225 -openvino==2026.1.0.dev20260225 +openvino-tokenizers==2026.1.0.0.dev20260305 +openvino==2026.1.0.dev20260305 pillow sentence_transformers sentencepiece # Required by: transformers` diff --git a/src/llm/language_model/continuous_batching/llm_executor.hpp b/src/llm/language_model/continuous_batching/llm_executor.hpp index f396d78dc3..3ca9efa30e 100644 --- a/src/llm/language_model/continuous_batching/llm_executor.hpp +++ b/src/llm/language_model/continuous_batching/llm_executor.hpp @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -31,13 +32,15 @@ namespace ovms { struct LLMExecutor { + bool isDynamicKVCache; // For logging purposes we could have more information about graph and node here std::mutex mutex; std::condition_variable cv; std::shared_ptr pipe = nullptr; - LLMExecutor(std::shared_ptr pipe) { + LLMExecutor(std::shared_ptr pipe, bool isDynamicKVCacheSet = false) { this->pipe = std::move(pipe); + this->isDynamicKVCache = isDynamicKVCacheSet; } bool hasRequests() { @@ -59,12 +62,47 @@ struct LLMExecutor { cv.notify_one(); } + std::string formatCacheInfo(float cacheUsage, size_t cacheBytes, bool isCacheDynamic) { + std::ostringstream oss; + oss << std::fixed << std::setprecision(1); + if (isCacheDynamic) { + oss << "dynamic " << cacheUsage << "% of " << formatBytes(cacheBytes); + } else { + oss << "static " << cacheUsage << "% of " << formatBytes(cacheBytes); + } + + return oss.str(); + } + + std::string formatBytes(size_t bytes) { + const double KB = 1024.0; + const double MB = KB * 1024.0; + const double GB = MB * 1024.0; + const double TB = GB * 1024.0; + + std::ostringstream oss; + oss << std::fixed << std::setprecision(1); + + if (bytes >= TB) + oss << (bytes / TB) << " TB"; + else if (bytes >= GB) + oss << (bytes / GB) << " GB"; + else if (bytes >= MB) + oss << (bytes / MB) << " MB"; + else if (bytes >= KB) + oss << (bytes / KB) << " KB"; + else + oss << bytes << " B"; + + return oss.str(); + } + #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wunused-but-set-variable" void printMetrics() { ov::genai::PipelineMetrics metrics = pipe->get_metrics(); - SPDLOG_LOGGER_INFO(llm_executor_logger, "All requests: {}; Scheduled requests: {}; Cache usage {:.1f}%;", - metrics.requests, metrics.scheduled_requests, metrics.cache_usage); + SPDLOG_LOGGER_INFO(llm_executor_logger, "All requests: {}; Scheduled requests: {}; Cache usage {};", + metrics.requests, metrics.scheduled_requests, formatCacheInfo(metrics.cache_usage, metrics.kv_cache_size_in_bytes, this->isDynamicKVCache)); } }; #pragma GCC diagnostic pop @@ -98,8 +136,8 @@ class LLMExecutorWrapper { } public: - LLMExecutorWrapper(std::shared_ptr pipe) : - llmExecutor(std::move(pipe)) { + LLMExecutorWrapper(std::shared_ptr pipe, bool isDynamicKVCache = false) : + llmExecutor(std::move(pipe), isDynamicKVCache) { llmExecutorThread = std::thread(LLMExecutorWrapper::run, &llmExecutor, &finishExecutorThread); } diff --git a/src/llm/language_model/continuous_batching/servable_initializer.cpp b/src/llm/language_model/continuous_batching/servable_initializer.cpp index 9b2018b86d..27f4f51aee 100644 --- a/src/llm/language_model/continuous_batching/servable_initializer.cpp +++ b/src/llm/language_model/continuous_batching/servable_initializer.cpp @@ -223,7 +223,7 @@ Status ContinuousBatchingServableInitializer::initialize(std::shared_ptrmaxModelLength = parseMaxModelLength(parsedModelsPath); - properties->llmExecutorWrapper = std::make_shared(properties->pipeline); + properties->llmExecutorWrapper = std::make_shared(properties->pipeline, properties->schedulerConfig.cache_size == 0); return StatusCode::OK; } diff --git a/windows_install_build_dependencies.bat b/windows_install_build_dependencies.bat index 44b268123a..11f91e1734 100644 --- a/windows_install_build_dependencies.bat +++ b/windows_install_build_dependencies.bat @@ -155,7 +155,7 @@ IF "%OV_USE_BINARY%"=="0" ( ::::::::::::::::::::::: GENAI/OPENVINO install from ZIP - reinstalled per build trigger :: Set default GENAI_PACKAGE_URL if not set if "%GENAI_PACKAGE_URL%"=="" ( - set "GENAI_PACKAGE_URL=https://storage.openvinotoolkit.org/repositories/openvino_genai/packages/nightly/2026.1.0.0.dev20260225/openvino_genai_windows_2026.1.0.0.dev20260225_x86_64.zip" + set "GENAI_PACKAGE_URL=https://storage.openvinotoolkit.org/repositories/openvino_genai/packages/nightly/2026.1.0.0.dev20260305/openvino_genai_windows_2026.1.0.0.dev20260305_x86_64.zip" ) :: Extract genai_ver from GENAI_PACKAGE_URL (filename) @@ -208,7 +208,7 @@ IF /I EXIST %BAZEL_SHORT_PATH%\openvino ( rmdir /S /Q %BAZEL_SHORT_PATH%\openvino ) if "%OV_SOURCE_BRANCH%"=="" ( - set "OV_SOURCE_BRANCH=9a5c0f67aa9bfe780972eaa721ccfa082323e9a4" + set "OV_SOURCE_BRANCH=b345118847e7d22a9c427ef78be443f83242655f" ) if "%OV_SOURCE_ORG%"=="" ( set "OV_SOURCE_ORG=openvinotoolkit" @@ -217,13 +217,13 @@ if "%TOKENIZER_SOURCE_ORG%"=="" ( set "TOKENIZER_SOURCE_ORG=openvinotoolkit" ) if "%TOKENIZER_SOURCE_BRANCH%"=="" ( - set "TOKENIZER_SOURCE_BRANCH=85480f170beba3a975cf908bc688a4398424aba8" + set "TOKENIZER_SOURCE_BRANCH=933000946429de3f7aeea6fa7be0c07af96240cf" ) if "%GENAI_SOURCE_ORG%"=="" ( set "GENAI_SOURCE_ORG=openvinotoolkit" ) if "%GENAI_SOURCE_BRANCH%"=="" ( - set "GENAI_SOURCE_BRANCH=d93080c377f934a1b4acf371700313cd98f369b9" + set "GENAI_SOURCE_BRANCH=26af9fdd2ac0029b26672eedc778a9a8ff69165d" ) echo [INFO] Using OpenVINO source from %OV_SOURCE_ORG%