From ef5a10ec7f38ca522130ce64762ff9cf2230829e Mon Sep 17 00:00:00 2001
From: Tony Zlatinski <tzlatinski@nvidia.com>
Date: Thu, 27 Mar 2025 18:35:37 -0500
Subject: [PATCH 01/14] common: Modify the output filename extension based on
 the type

---
 .../libs/VkCodecUtils/VkVideoFrameToFile.cpp  | 33 +++++++++++++++++--
 1 file changed, 31 insertions(+), 2 deletions(-)

diff --git a/common/libs/VkCodecUtils/VkVideoFrameToFile.cpp b/common/libs/VkCodecUtils/VkVideoFrameToFile.cpp
index cb71ccb6..846a0890 100644
--- a/common/libs/VkCodecUtils/VkVideoFrameToFile.cpp
+++ b/common/libs/VkCodecUtils/VkVideoFrameToFile.cpp
@@ -265,15 +265,44 @@ class VkVideoFrameToFileImpl : public VkVideoFrameOutput {
         }
     }
 
-    FILE* AttachFile(const char* fileName) {
+    bool hasExtension(const char* fileName, const char* extension) {
+        size_t fileLen = std::strlen(fileName);
+        size_t extLen = std::strlen(extension);
+
+        if (fileLen < extLen) {
+            return false;
+        }
+
+        return std::strcmp(fileName + fileLen - extLen, extension) == 0;
+    }
+
+    FILE* AttachFile(const char* fileName, bool y4mFormat) {
         if (m_outputFile) {
             fclose(m_outputFile);
             m_outputFile = nullptr;
         }
 
+        std::string fileNameWithModExt;
+        // Check if the file does not have a y4m extension,
+        // but y4m format is requested.
+        if (y4mFormat && !hasExtension(fileName, ".y4m")) {
+            std::cout << std::endl << "y4m output format is requested, ";
+            std::cout << "but the output file's (" << fileName << ") extension isn't .y4m!"
+                      << std::endl;
+            fileNameWithModExt = fileName + std::string(".y4m");
+            fileName = fileNameWithModExt.c_str();
+        } else if (!hasExtension(fileName, ".yuv")) {
+            std::cout << std::endl << "Raw yuv output format is requested, ";
+            std::cout << "but the output file's (" << fileName << ") extension isn't .yuv!"
+                      << std::endl;
+            fileNameWithModExt = fileName + std::string(".yuv");
+            fileName = fileNameWithModExt.c_str();
+        }
+
         if (fileName != nullptr) {
             m_outputFile = fopen(fileName, "wb");
             if (m_outputFile) {
+                std::cout << "Output file name is: " << fileName << std::endl;
                 return m_outputFile;
             }
         }
@@ -568,7 +597,7 @@ VkResult VkVideoFrameOutput::Create(const char* fileName,
         return VK_ERROR_OUT_OF_HOST_MEMORY;
     }
 
-    FILE* outFile = newFrameToFile->AttachFile(fileName);
+    FILE* outFile = newFrameToFile->AttachFile(fileName, outputy4m);
     if ((fileName != nullptr) && (outFile == nullptr)) {
         delete newFrameToFile;
         return VK_ERROR_INITIALIZATION_FAILED;

From 892cb87b2cccf08eebff83ed775e4a5639ca4d99 Mon Sep 17 00:00:00 2001
From: Tony Zlatinski <tzlatinski@nvidia.com>
Date: Mon, 12 May 2025 08:15:31 -0500
Subject: [PATCH 02/14] encode: remove the CPU input conversion function

Compute filter: Add support for bufffer as input
Compute filter: Add support for in/out plane mismatch
Compute filter: Add 10/12-bit shift support
Compute filter: Add supprt for 4:4:4, 4:2:2 and 4:2:0
---
 .../include/nvidia_utils/vulkan/ycbcr_utils.h |   18 +
 common/libs/VkCodecUtils/FrameProcessor.h     |    2 +-
 .../VkCodecUtils/VulkanFilterYuvCompute.cpp   | 2756 ++++++++++++++++-
 .../VkCodecUtils/VulkanFilterYuvCompute.h     |  475 ++-
 .../libs/VkVideoDecoder/VkVideoDecoder.cpp    |   22 +-
 .../libs/VkVideoEncoder/VkVideoEncoder.cpp    |  224 +-
 .../libs/VkVideoEncoder/VkVideoEncoder.h      |   23 +
 7 files changed, 3039 insertions(+), 481 deletions(-)

diff --git a/common/include/nvidia_utils/vulkan/ycbcr_utils.h b/common/include/nvidia_utils/vulkan/ycbcr_utils.h
index 7713c1e7..46f3ed78 100644
--- a/common/include/nvidia_utils/vulkan/ycbcr_utils.h
+++ b/common/include/nvidia_utils/vulkan/ycbcr_utils.h
@@ -103,6 +103,24 @@ typedef struct YcbcrPlanesLayoutInfo {
     uint8_t              reserved;                    // reserved for structure alignment.
 } YcbcrPlanesLayoutInfo;
 
+static inline uint32_t GetBitsPerChannel(const YcbcrPlanesLayoutInfo& pYcbcrPlanesLayoutInfo)
+{
+    switch (pYcbcrPlanesLayoutInfo.bpp) {
+        case YCBCRA_8BPP:
+            return 8;
+        case YCBCRA_10BPP:
+            return 10;
+        case YCBCRA_12BPP:
+            return 12;
+        case YCBCRA_14BPP:
+            return 14;
+        case YCBCRA_16BPP:
+            return 16;
+        default:
+            return 8;
+    }
+}
+
 static inline size_t YcbcrAlign(size_t toAlign, size_t alignment)
 {
     return ((toAlign + (alignment - 1)) & ~(alignment -1));
diff --git a/common/libs/VkCodecUtils/FrameProcessor.h b/common/libs/VkCodecUtils/FrameProcessor.h
index 8a94f6ab..097a3fa6 100644
--- a/common/libs/VkCodecUtils/FrameProcessor.h
+++ b/common/libs/VkCodecUtils/FrameProcessor.h
@@ -106,7 +106,7 @@ class FrameProcessor : public VkVideoRefCountBase {
     FrameProcessor(bool verbose = false)
         : m_frameCount(0)
         , m_profileFramesCount(0)
-        , m_displayTimePeriodMilliseconds(1000)
+        , m_displayTimePeriodMilliseconds(100)
         , start_time (std::chrono::steady_clock::now())
         , m_verbose(verbose)
     {
diff --git a/common/libs/VkCodecUtils/VulkanFilterYuvCompute.cpp b/common/libs/VkCodecUtils/VulkanFilterYuvCompute.cpp
index dd67b2b5..906cc229 100644
--- a/common/libs/VkCodecUtils/VulkanFilterYuvCompute.cpp
+++ b/common/libs/VkCodecUtils/VulkanFilterYuvCompute.cpp
@@ -17,7 +17,7 @@
 #include "VulkanFilterYuvCompute.h"
 #include "nvidia_utils/vulkan/ycbcrvkinfo.h"
 
-static bool dumpShaders = false;
+static bool dumpShaders = true;
 
 VkResult VulkanFilterYuvCompute::Create(const VulkanDeviceContext* vkDevCtx,
                                         uint32_t queueFamilyIndex,
@@ -26,6 +26,8 @@ VkResult VulkanFilterYuvCompute::Create(const VulkanDeviceContext* vkDevCtx,
                                         uint32_t maxNumFrames,
                                         VkFormat inputFormat,
                                         VkFormat outputFormat,
+                                        bool inputEnableMsbToLsbShift,
+                                        bool outputEnableLsbToMsbShift,
                                         const VkSamplerYcbcrConversionCreateInfo* pYcbcrConversionCreateInfo,
                                         const YcbcrPrimariesConstants* pYcbcrPrimariesConstants,
                                         const VkSamplerCreateInfo* pSamplerCreateInfo,
@@ -39,6 +41,8 @@ VkResult VulkanFilterYuvCompute::Create(const VulkanDeviceContext* vkDevCtx,
                                                                                          maxNumFrames,
                                                                                          inputFormat,
                                                                                          outputFormat,
+                                                                                         inputEnableMsbToLsbShift,
+                                                                                         outputEnableLsbToMsbShift,
                                                                                          pYcbcrPrimariesConstants));
 
     if (!yCbCrVulkanFilter) {
@@ -116,34 +120,58 @@ VkResult VulkanFilterYuvCompute::Init(const VkSamplerYcbcrConversionCreateInfo*
 VkResult VulkanFilterYuvCompute::InitDescriptorSetLayout(uint32_t maxNumFrames)
 {
 
+
     VkSampler ccSampler = m_samplerYcbcrConversion.GetSampler();
-    assert(ccSampler != VK_NULL_HANDLE);
-    VkDescriptorType type = (ccSampler != VK_NULL_HANDLE) ? VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER : VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
+    VkDescriptorType type = (ccSampler != VK_NULL_HANDLE) ? VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER :
+                                                            VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
     const VkSampler* pImmutableSamplers = (ccSampler != VK_NULL_HANDLE) ? &ccSampler : nullptr;
 
-    const std::vector<VkDescriptorSetLayoutBinding> setLayoutBindings{
-        //                        binding,  descriptorType,          descriptorCount, stageFlags, pImmutableSamplers;
+    std::vector<VkDescriptorSetLayoutBinding> setLayoutBindings;
+
+    // Input bindings (either images or buffers)
+    if (m_inputIsBuffer) {
+        // Binding 0: Input buffer (read-only) for single buffer case
+        setLayoutBindings.push_back(VkDescriptorSetLayoutBinding{ 0, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_COMPUTE_BIT, nullptr});
+        // Binding 1: Input buffer (read-only) Y plane
+        setLayoutBindings.push_back(VkDescriptorSetLayoutBinding{ 1, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_COMPUTE_BIT, nullptr});
+        // Binding 2: Input buffer (read-only) Cb or CbCr plane
+        setLayoutBindings.push_back(VkDescriptorSetLayoutBinding{ 2, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_COMPUTE_BIT, nullptr});
+        // Binding 3: Input buffer (read-only) Cr plane
+        setLayoutBindings.push_back(VkDescriptorSetLayoutBinding{ 3, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_COMPUTE_BIT, nullptr});
+    } else {
         // Binding 0: Input image (read-only) RGBA or RGBA YCbCr sampler sampled
-        VkDescriptorSetLayoutBinding{ 0, type,                             1, VK_SHADER_STAGE_COMPUTE_BIT, pImmutableSamplers},
+        setLayoutBindings.push_back(VkDescriptorSetLayoutBinding{ 0, type, 1, VK_SHADER_STAGE_COMPUTE_BIT, pImmutableSamplers});
         // Binding 1: Input image (read-only) Y plane of YCbCr Image
-        VkDescriptorSetLayoutBinding{ 1, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 1, VK_SHADER_STAGE_COMPUTE_BIT, nullptr},
+        setLayoutBindings.push_back(VkDescriptorSetLayoutBinding{ 1, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 1, VK_SHADER_STAGE_COMPUTE_BIT, nullptr});
         // Binding 2: Input image (read-only) Cb or CbCr plane
-        VkDescriptorSetLayoutBinding{ 2, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 1, VK_SHADER_STAGE_COMPUTE_BIT, nullptr},
+        setLayoutBindings.push_back(VkDescriptorSetLayoutBinding{ 2, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 1, VK_SHADER_STAGE_COMPUTE_BIT, nullptr});
         // Binding 3: Input image (read-only) Cr plane
-        VkDescriptorSetLayoutBinding{ 3, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 1, VK_SHADER_STAGE_COMPUTE_BIT, nullptr},
+        setLayoutBindings.push_back(VkDescriptorSetLayoutBinding{ 3, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 1, VK_SHADER_STAGE_COMPUTE_BIT, nullptr});
+    }
 
+    // Output bindings (either images or buffers)
+    if (m_outputIsBuffer) {
+        // Binding 4: Output buffer (write) for single buffer case
+        setLayoutBindings.push_back(VkDescriptorSetLayoutBinding{ 4, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_COMPUTE_BIT, nullptr});
+        // Binding 5: Output buffer (write) Y plane
+        setLayoutBindings.push_back(VkDescriptorSetLayoutBinding{ 5, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_COMPUTE_BIT, nullptr});
+        // Binding 6: Output buffer (write) CbCr plane of 2-plane or Cb of 3-plane
+        setLayoutBindings.push_back(VkDescriptorSetLayoutBinding{ 6, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_COMPUTE_BIT, nullptr});
+        // Binding 7: Output buffer (write) Cr plane of 3-plane
+        setLayoutBindings.push_back(VkDescriptorSetLayoutBinding{ 7, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_COMPUTE_BIT, nullptr});
+    } else {
         // Binding 4: Output image (write) RGBA or YCbCr single-plane image
-        VkDescriptorSetLayoutBinding{ 4, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 1, VK_SHADER_STAGE_COMPUTE_BIT, nullptr},
+        setLayoutBindings.push_back(VkDescriptorSetLayoutBinding{ 4, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 1, VK_SHADER_STAGE_COMPUTE_BIT, nullptr});
         // Binding 5: Output image (write) Y plane of YCbCr Image
-        VkDescriptorSetLayoutBinding{ 5, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 1, VK_SHADER_STAGE_COMPUTE_BIT, nullptr},
+        setLayoutBindings.push_back(VkDescriptorSetLayoutBinding{ 5, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 1, VK_SHADER_STAGE_COMPUTE_BIT, nullptr});
         // Binding 6: Output image (write) CbCr plane of 2-plane or Cb of 3-plane YCbCr Image
-        VkDescriptorSetLayoutBinding{ 6, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 1, VK_SHADER_STAGE_COMPUTE_BIT, nullptr},
+        setLayoutBindings.push_back(VkDescriptorSetLayoutBinding{ 6, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 1, VK_SHADER_STAGE_COMPUTE_BIT, nullptr});
         // Binding 7: Output image (write) Cr plane of 3-pane YCbCr Image
-        VkDescriptorSetLayoutBinding{ 7, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 1, VK_SHADER_STAGE_COMPUTE_BIT, nullptr},
+        setLayoutBindings.push_back(VkDescriptorSetLayoutBinding{ 7, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 1, VK_SHADER_STAGE_COMPUTE_BIT, nullptr});
+    }
 
-        // Binding 8: uniform buffer for input parameters.
-        VkDescriptorSetLayoutBinding{ 8, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 1, VK_SHADER_STAGE_COMPUTE_BIT, nullptr},
-    };
+    // Binding 8: uniform buffer for input parameters.
+    setLayoutBindings.push_back(VkDescriptorSetLayoutBinding{ 8, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 1, VK_SHADER_STAGE_COMPUTE_BIT, nullptr});
 
     VkPushConstantRange pushConstantRange = {};
     pushConstantRange.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; // Stage the push constant is for
@@ -175,20 +203,74 @@ static YcbcrBtStandard GetYcbcrPrimariesConstantsId(VkSamplerYcbcrModelConversio
     return YcbcrBtStandardUnknown;
 }
 
+// Generate a unified push constants declaration for shaders
+/**
+ * @brief Generates GLSL code for push constants declaration used in compute shaders
+ *
+ * This function creates a standard push constants block with fields for:
+ * - Source and destination image layers
+ * - Input and output dimensions
+ * - Buffer offsets and pitches for Y, Cb, and Cr planes
+ *
+ * @param shaderStr Output stringstream where the GLSL code will be written
+ */
+void GenPushConstantsDecl(std::stringstream& shaderStr) {
+    shaderStr << "layout(push_constant) uniform PushConstants {\n"
+              << "    uint srcLayer;        // src image layer to use\n"
+              << "    uint dstLayer;        // dst image layer to use\n"
+              << "    uint inputWidth;      // input image or buffer width\n"
+              << "    uint inputHeight;     // input image or buffer height\n"
+              << "    uint outputWidth;     // output image or buffer width\n"
+              << "    uint outputHeight;    // output image or buffer height\n"
+              << "    uint inYOffset;       // input  buffer Y plane offset\n"
+              << "    uint inCbOffset;      // input  buffer Cb plane offset\n"
+              << "    uint inCrOffset;      // input  buffer Cr plane offset\n"
+              << "    uint inYPitch;        // input  buffer Y plane pitch\n"
+              << "    uint inCbPitch;       // input  buffer Cb plane pitch\n"
+              << "    uint inCrPitch;       // input  buffer Cr plane pitch\n"
+              << "    uint outYOffset;      // output buffer Y plane offset\n"
+              << "    uint outCbOffset;     // output buffer Cb plane offset\n"
+              << "    uint outCrOffset;     // output buffer Cr plane offset\n"
+              << "    uint outYPitch;       // output buffer Y plane pitch\n"
+              << "    uint outCbPitch;      // output buffer Cb plane pitch\n"
+              << "    uint outCrPitch;      // output buffer Cr plane pitch\n"
+              << "} pushConstants;\n";
+}
+
+// Updated header function with unified push constants
+/**
+ * @brief Generates the shader header with version declaration and push constants
+ *
+ * Creates the beginning of a GLSL compute shader with:
+ * - GLSL version declaration (#version 450)
+ * - Push constants structure
+ * - Local work group size (16x16)
+ *
+ * @param shaderStr Output stringstream where the GLSL code will be written
+ */
 static void GenHeaderAndPushConst(std::stringstream& shaderStr)
 {
-    shaderStr << "#version 450\n"
-                 "layout(push_constant) uniform PushConstants {\n"
-                 "    uint srcImageLayer;  // Source image layer index\n"
-                 "    uint dstImageLayer;  // Destination image layer index\n"
-                 "    ivec2 inputSize;     // Original input image size (width, height)\n"
-                 "    ivec2 outputSize;    // Output image size (width, height, with padding)\n"
-                 "} pushConstants;\n"
-                 "\n"
-                 "layout (local_size_x = 16, local_size_y = 16) in;\n"
-                 "\n";
+    shaderStr << "#version 450\n";
+    GenPushConstantsDecl(shaderStr);
+    shaderStr << "\n"
+              << "layout (local_size_x = 16, local_size_y = 16) in;\n"
+              << "\n";
 }
 
+/**
+ * @brief Generates GLSL code for image binding layout declarations
+ *
+ * Creates the binding declaration for an image resource in the shader.
+ *
+ * @param shaderStr Output stringstream where the GLSL code will be written
+ * @param imageName Base name for the image variable
+ * @param imageSubName Suffix name for the image variable (e.g., "Y", "CbCr")
+ * @param imageFormat Format string for the image (e.g., "rgba8")
+ * @param isInput Whether this is an input (readonly) or output (writeonly) image
+ * @param binding Binding point in the descriptor set
+ * @param set Descriptor set number
+ * @param imageArray Whether the image should be declared as image2DArray instead of image2D
+ */
 static void GenImageIoBindingLayout(std::stringstream& shaderStr,
                                     const char *imageName,
                                     const char *imageSubName,
@@ -206,22 +288,249 @@ static void GenImageIoBindingLayout(std::stringstream& shaderStr,
 
 }
 
+/**
+ * @brief Generates GLSL code for handling global invocation position and bounds checking
+ *
+ * Creates code to:
+ * - Get the current pixel position from gl_GlobalInvocationID
+ * - Check if the position is within output image bounds
+ * - Return early if out of bounds to prevent invalid memory access
+ *
+ * @param shaderStr Output stringstream where the GLSL code will be written
+ */
 static void GenHandleImagePosition(std::stringstream& shaderStr)
 {
     shaderStr <<
     "    ivec2 pos = ivec2(gl_GlobalInvocationID.xy);\n"
     "    // Check for out-of-bounds writes\n"
-    "    if ((pos.x >= pushConstants.outputSize.x) || (pos.y >= pushConstants.outputSize.y)) {\n"
+    "    if ((pos.x >= pushConstants.outputWidth) || (pos.y >= pushConstants.outputHeight)) {\n"
+    "        return;\n"
+    "    }\n"
+    "\n";
+}
+
+/**
+ * @brief Generates GLSL code for buffer binding layout declarations
+ *
+ * Creates the binding declaration for a buffer resource in the shader.
+ *
+ * @param shaderStr Output stringstream where the GLSL code will be written
+ * @param bufferName Base name for the buffer variable
+ * @param bufferSubName Suffix name for the buffer variable (e.g., "Y", "CbCr")
+ * @param bufferDataType Data type of buffer elements (e.g., "uint8_t", "uint16_t")
+ * @param bufferType Vulkan descriptor type (Storage buffer, uniform texel buffer, etc.)
+ * @param isInput Whether this is an input (readonly) or output (writeonly) buffer
+ * @param binding Binding point in the descriptor set
+ * @param set Descriptor set number
+ */
+static void GenBufferIoBindingLayout(std::stringstream& shaderStr,
+                                     const char *bufferName,
+                                     const char *bufferSubName,
+                                     const char *bufferDataType,
+                                     VkDescriptorType bufferType,
+                                     bool isInput,
+                                     uint32_t binding,
+                                     uint32_t set) {
+
+    const char* readonlyModifier = isInput ? " readonly" : "";
+    const char* writeonlyModifier = isInput ? "" : " writeonly";
+
+    switch (bufferType) {
+        case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
+            shaderStr << "layout (set = " << set << ", binding = " << binding << ") uniform"
+                      << " samplerBuffer "
+                      << bufferName << bufferSubName
+                      << ";\n";
+            break;
+
+        case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
+            shaderStr << "layout (set = " << set << ", binding = " << binding << ") uniform"
+                      << readonlyModifier << writeonlyModifier
+                      << " imageBuffer "
+                      << bufferName << bufferSubName
+                      << ";\n";
+            break;
+
+        case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
+        case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
+            shaderStr << "layout (set = " << set << ", binding = " << binding << ") buffer"
+                      << readonlyModifier << writeonlyModifier
+                      << " " << bufferName << bufferSubName << "Buffer"
+                      << " {\n"
+                      << "    " << bufferDataType << "[] data;\n"
+                      << "} " << bufferName << bufferSubName << ";\n";
+            break;
+
+        default:
+            // Unsupported buffer type
+            break;
+    }
+}
+
+/**
+ * @brief Generates GLSL code for determining if a position has chroma information
+ *
+ * Creates a condition that checks if the current pixel position contains
+ * chroma information based on the subsampling ratios. For example, in 4:2:0
+ * subsampling, only pixels at even x and y coordinates have chroma samples.
+ *
+ * @param shaderStr Output stringstream where the GLSL code will be written
+ * @param chromaHorzRatio Horizontal subsampling ratio (1 for 4:4:4, 2 for 4:2:2/4:2:0)
+ * @param chromaVertRatio Vertical subsampling ratio (1 for 4:4:4/4:2:2, 2 for 4:2:0)
+ * @param useCondition Whether to output as a full if-condition (true) or just the condition expression (false)
+ * @param pixelPosName Name of the pixel position variable in the shader (default: "srcPos")
+ * @param setProcessChromaBool Name of the boolean variable to set (default: "processChromaBool")
+ */
+static void GenHandleChromaPosition(std::stringstream& shaderStr,
+                                uint32_t chromaHorzRatio,
+                                uint32_t chromaVertRatio,
+                                bool useCondition = true,
+                                const char* pixelPosName = "srcPos",
+                                const char* setProcessChromaBool = "processChromaBool")
+{
+    // Skip this for 4:4:4 since all pixels have chroma
+    if (chromaHorzRatio <= 1 && chromaVertRatio <= 1) {
+        if (useCondition) {
+            // For 4:4:4, no subsampling check needed - process all pixels
+            shaderStr << "    bool " << setProcessChromaBool << " = true;\n";
+        } else {
+            shaderStr << "true";
+        }
+        return;
+    }
+
+    // Build condition for chroma sampling
+    std::stringstream condition;
+    if (chromaHorzRatio > 1)
+        condition << "(" << pixelPosName << ".x % " << chromaHorzRatio << " == 0)";
+
+    if (chromaHorzRatio > 1 && chromaVertRatio > 1)
+        condition << " && ";
+
+    if (chromaVertRatio > 1)
+        condition << "(" << pixelPosName << ".y % " << chromaVertRatio << " == 0)";
+
+    if (useCondition) {
+        shaderStr << "    bool " << setProcessChromaBool << " = " << condition.str() << ";\n";
+    } else {
+        shaderStr << condition.str();
+    }
+}
+
+/**
+ * @brief Generates GLSL code for calculating subsampled chroma positions
+ *
+ * Creates code to compute the chroma position from a pixel position
+ * based on the subsampling ratios. For example, in 4:2:0 subsampling,
+ * the chroma position is calculated by dividing both x and y by 2.
+ *
+ * @param shaderStr Output stringstream where the GLSL code will be written
+ * @param chromaHorzRatio Horizontal subsampling ratio (1 for 4:4:4, 2 for 4:2:2/4:2:0)
+ * @param chromaVertRatio Vertical subsampling ratio (1 for 4:4:4/4:2:2, 2 for 4:2:0)
+ * @param srcPosName Name of the source position variable (default: "srcPos")
+ * @param dstPosName Name of the destination position variable (default: "chromaSrcPos")
+ * @param indent Number of spaces to indent the output code (default: 8)
+ * @param generateIfBlock Whether to generate an if-block or just assignment statements (default: false)
+ */
+static void GenCalculateChromaPosition(std::stringstream& shaderStr,
+                                     uint32_t chromaHorzRatio,
+                                     uint32_t chromaVertRatio,
+                                     const char* srcPosName = "srcPos",
+                                     const char* dstPosName = "chromaSrcPos",
+                                     int indent = 8,
+                                     bool generateIfBlock = false)
+{
+    std::string indentStr(indent, ' ');
+
+    // For 4:4:4, no subsampling needed
+    if (chromaHorzRatio <= 1 && chromaVertRatio <= 1) {
+        shaderStr << indentStr << "// No subsampling for 4:4:4 format, use original position\n";
+        if (generateIfBlock) {
+            shaderStr << indentStr << "// " << dstPosName << " already equals " << srcPosName << "\n";
+        } else {
+            shaderStr << indentStr << dstPosName << " = " << srcPosName << ";\n";
+        }
+        return;
+    }
+
+    shaderStr << indentStr << "// Calculate subsampled positions based on format's subsampling\n";
+
+    if (generateIfBlock) {
+        // Generate an if-block for conditional calculation
+        shaderStr << indentStr << dstPosName << " = " << srcPosName << ";\n";
+        shaderStr << indentStr << "if (processChroma) {\n";
+
+        if (chromaHorzRatio > 1) {
+            shaderStr << indentStr << "    " << dstPosName << ".x = " << srcPosName << ".x / " << chromaHorzRatio << ";\n";
+        }
+
+        if (chromaVertRatio > 1) {
+            shaderStr << indentStr << "    " << dstPosName << ".y = " << srcPosName << ".y / " << chromaVertRatio << ";\n";
+        }
+
+        shaderStr << indentStr << "}\n";
+    } else {
+        // Generate direct assignment statements
+        shaderStr << indentStr << dstPosName << " = ivec2(";
+
+        if (chromaHorzRatio > 1)
+            shaderStr << srcPosName << ".x / " << chromaHorzRatio;
+        else
+            shaderStr << srcPosName << ".x";
+
+        shaderStr << ", ";
+
+        if (chromaVertRatio > 1)
+            shaderStr << srcPosName << ".y / " << chromaVertRatio;
+        else
+            shaderStr << srcPosName << ".y";
+
+        shaderStr << ");\n";
+    }
+}
+
+/**
+ * @brief Generates GLSL code for handling buffer position calculations with chroma subsampling
+ *
+ * Creates code to:
+ * - Get the current pixel position from gl_GlobalInvocationID
+ * - Check if the position is within output bounds
+ * - Calculate appropriate buffer indices based on subsampling ratios
+ *
+ * @param shaderStr Output stringstream where the GLSL code will be written
+ * @param chromaHorzRatio Horizontal subsampling ratio (default: 2 for 4:2:0/4:2:2)
+ * @param chromaVertRatio Vertical subsampling ratio (default: 2 for 4:2:0)
+ */
+static void GenHandleBufferPosition(std::stringstream& shaderStr, int chromaHorzRatio = 2, int chromaVertRatio = 2)
+{
+    shaderStr <<
+    "    ivec2 pos = ivec2(gl_GlobalInvocationID.xy);\n"
+    "    // Check for out-of-bounds writes\n"
+    "    if ((pos.x >= pushConstants.outputWidth) || (pos.y >= pushConstants.outputHeight)) {\n"
     "        return;\n"
     "    }\n"
+    "    \n"
+    "    // Calculate buffer indices based on position and strides\n"
+    "    uint yIndex = pushConstants.inYOffset + pos.y * pushConstants.inYPitch + pos.x;\n"
+    "    uint cbIndex = pushConstants.inCbOffset + (pos.y / " << chromaVertRatio << ") * pushConstants.inCbPitch + (pos.x / " << chromaHorzRatio << ");\n"
+    "    uint crIndex = pushConstants.inCrOffset + (pos.y / " << chromaVertRatio << ") * pushConstants.inCrPitch + (pos.x / " << chromaHorzRatio << ");\n"
     "\n";
 }
 
+/**
+ * @brief Generates GLSL code for handling source position with optional replication
+ *
+ * Creates code to calculate source position, with optional boundary handling
+ * by replicating edge pixels when coordinates exceed input dimensions.
+ *
+ * @param shaderStr Output stringstream where the GLSL code will be written
+ * @param enableReplicate Whether to enable edge replication (clamp to edge)
+ */
 static void GenHandleSourcePositionWithReplicate(std::stringstream& shaderStr, bool enableReplicate)
 {
     if (enableReplicate) {
         shaderStr <<
-        "    ivec2 srcPos = min(pos, pushConstants.inputSize );\n"
+        "    ivec2 srcPos = min(pos, ivec2(pushConstants.inputWidth, pushConstants.inputHeight));\n"
         "\n";
     } else {
         shaderStr <<
@@ -230,15 +539,622 @@ static void GenHandleSourcePositionWithReplicate(std::stringstream& shaderStr, b
     }
 }
 
-void VulkanFilterYuvCompute::ShaderGeneratePlaneDescriptors(std::stringstream& shaderStr,
-                                                            VkImageAspectFlags& imageAspects,
-                                                            const char *imageName,
-                                                            VkFormat    imageFormat,
-                                                            bool isInput,
-                                                            uint32_t startBinding,
-                                                            uint32_t set,
-                                                            bool imageArray)
+/**
+ * @brief Generates GLSL function for fetching Y samples from a buffer
+ *
+ * Creates a helper function that reads Y samples from a buffer and
+ * normalizes values to 0.0-1.0 range, handling different bit depths.
+ *
+ * @param shaderStr Output stringstream where the GLSL code will be written
+ * @param isHighBitDepth Whether the Y data is high bit depth (>8 bits)
+ * @param bitDepth The bit depth of Y samples (8, 10, 12, or 16)
+ */
+static void GenFetchYFromBufferFunc(std::stringstream& shaderStr,
+                                    bool isHighBitDepth, uint32_t bitDepth)
 {
+    shaderStr << "// Function to fetch Y component from buffer\n"
+              << "float fetchYFromBuffer(uint index) {\n";
+
+    if (isHighBitDepth) {
+        shaderStr << "    uint16_t rawValue = inputBufferY.data[index];\n"
+                  << "    return extractHighBitDepth(rawValue);\n";
+    } else {
+        shaderStr << "    uint8_t byteValue = inputBufferY.data[index];\n"
+                  << "    return float(byteValue) / 255.0;\n";
+    }
+
+    shaderStr << "}\n\n";
+}
+
+/**
+ * @brief Generates GLSL functions for fetching Cb and Cr samples from buffers
+ *
+ * Creates helper functions to read Cb and Cr chroma samples from buffers and
+ * normalize values to 0.0-1.0 range, handling different bit depths.
+ *
+ * @param shaderStr Output stringstream where the GLSL code will be written
+ * @param isHighBitDepth Whether the chroma data is high bit depth (>8 bits)
+ * @param bitDepth The bit depth of chroma samples (8, 10, 12, or 16)
+ */
+static void GenFetchCbCrFromBufferFunc(std::stringstream& shaderStr,
+                                       bool isHighBitDepth, uint32_t bitDepth) {
+    // Cb fetch function
+    shaderStr << "// Function to fetch Cb component from buffer\n"
+              << "float fetchCbFromBuffer(uint index) {\n";
+
+    if (isHighBitDepth) {
+        shaderStr << "    uint16_t rawValue = inputBufferCb.data[index];\n"
+                  << "    return extractHighBitDepth(rawValue);\n";
+    } else {
+        shaderStr << "    uint8_t byteValue = inputBufferCb.data[index];\n"
+                  << "    return float(byteValue) / 255.0;\n";
+    }
+
+    shaderStr << "}\n\n";
+
+    // Cr fetch function
+    shaderStr << "// Function to fetch Cr component from buffer\n"
+              << "float fetchCrFromBuffer(uint index) {\n";
+
+    if (isHighBitDepth) {
+        shaderStr << "    uint16_t rawValue = inputBufferCr.data[index];\n"
+                  << "    return extractHighBitDepth(rawValue);\n";
+    } else {
+        shaderStr << "    uint8_t byteValue = inputBufferCr.data[index];\n"
+                  << "    return float(byteValue) / 255.0;\n";
+    }
+
+    shaderStr << "}\n\n";
+}
+
+/**
+ * @brief Generates GLSL function for extracting and normalizing high bit-depth values
+ *
+ * Creates a helper function to extract and normalize values from high bit-depth
+ * formats (10, 12, or 16 bits), handling MSB or LSB aligned data.
+ *
+ * @param shaderStr Output stringstream where the GLSL code will be written
+ * @param isMSB Whether the high bits are MSB-aligned (true) or LSB-aligned (false)
+ * @param bitDepth The bit depth of the samples (10, 12, or 16)
+ */
+static void GenExtractHighBitDepthFunc(std::stringstream& shaderStr,
+                                       bool isMSB, uint32_t bitDepth)
+{
+    shaderStr << "// Helper function to extract and normalize high bit-depth values\n";
+
+    if (isMSB) {
+        // For MSB-aligned data
+        shaderStr << "float extractHighBitDepth(uint value) {\n"
+                  << "    // For MSB-aligned " << bitDepth << "-bit data, shift right to extract the bits\n"
+                  << "    uint extractedValue = value >> (16u - " << bitDepth << "u);\n"
+                  << "    // Normalize to 0.0-1.0 range\n"
+                  << "    return float(extractedValue) / " << ((1 << bitDepth) - 1) << ".0;\n"
+                  << "}\n\n";
+    } else {
+        // For LSB-aligned data
+        shaderStr << "float extractHighBitDepth(uint value) {\n"
+                  << "    // For LSB-aligned " << bitDepth << "-bit data, mask to extract the bits\n"
+                  << "    uint extractedValue = value & " << ((1 << bitDepth) - 1) << "u;\n"
+                  << "    // Normalize to 0.0-1.0 range\n"
+                  << "    return float(extractedValue) / " << ((1 << bitDepth) - 1) << ".0;\n"
+                  << "}\n\n";
+    }
+}
+
+/**
+ * @brief Generates GLSL code for applying MSB-to-LSB bit shifting for high bit-depth content
+ *
+ * Creates code to convert MSB-aligned high bit-depth content to normalized values:
+ * - For images (floating point): Divide by the appropriate factor
+ * - For buffers (integer): Perform right bit shift operations
+ *
+ * @param shaderStr Output stringstream where the GLSL code will be written
+ * @param isInputBuffer Whether the input is a buffer (true) or image (false)
+ * @param inputBitDepth The bit depth of the input data (8, 10, 12, or 16)
+ * @param imageAspects Image aspect flags indicating which planes are being processed
+ */
+static void GenApplyMsbToLsbShift(std::stringstream& shaderStr,
+                                 bool isInputBuffer,
+                                 uint32_t inputBitDepth,
+                                 VkImageAspectFlags imageAspects)
+{
+    // Only apply for high bit-depth formats (10/12-bit)
+    if ((inputBitDepth != 10) && (inputBitDepth != 12)) {
+        return;
+    }
+
+    // Calculate shift amount based on bit depth
+    uint32_t shiftAmount = 16 - inputBitDepth;
+    float shiftFactor = static_cast<float>(1 << shiftAmount);
+
+    shaderStr << "\n    // MSB-to-LSB shift for high bit-depth "
+              << (isInputBuffer ? "buffer" : "image") << " data\n";
+
+    if (isInputBuffer) {
+        // For buffers, we use actual bit shifting operations on integer values
+        shaderStr << "    // For high bit-depth data in buffers, we need to shift right by "
+                  << shiftAmount << " bits to convert from MSB-aligned to actual values\n"
+                  << "    // This is a right shift operation for integer values\n";
+
+        // Build a condition mask based on which components are being read
+        std::string maskCondition = "";
+        bool needsOr = false;
+
+        if (imageAspects & VK_IMAGE_ASPECT_PLANE_0_BIT) {
+            maskCondition += "YCbCrRawOut.x > 0.0";
+            needsOr = true;
+        }
+
+        if (imageAspects & VK_IMAGE_ASPECT_PLANE_1_BIT) {
+            if (needsOr) maskCondition += " || ";
+            maskCondition += "YCbCrRawOut.y > 0.0";
+            needsOr = true;
+        }
+
+        if (imageAspects & VK_IMAGE_ASPECT_PLANE_2_BIT) {
+            if (needsOr) maskCondition += " || ";
+            maskCondition += "YCbCrRawOut.z > 0.0";
+        }
+
+        // Only apply shift if there are values to shift
+        if (!maskCondition.empty()) {
+            shaderStr << "    if (" << maskCondition << ") {\n"
+                      << "        // Convert from uint values to normalized float (for buffer inputs)\n";
+
+            if (inputBitDepth == 10) {
+                shaderStr << "        // For 10-bit: Convert 10-bit values [0-1023] to normalized [0-1]\n"
+                          << "        const float normFactor = 1.0 / 1023.0;\n";
+            } else { // 12-bit
+                shaderStr << "        // For 12-bit: Convert 12-bit values [0-4095] to normalized [0-1]\n"
+                          << "        const float normFactor = 1.0 / 4095.0;\n";
+            }
+
+            // Apply right shift with bit mask to extract the actual bit values
+            // For 10-bit: (value >> 6) & 0x3FF = value / 64 (rounded down)
+            // For 12-bit: (value >> 4) & 0xFFF = value / 16 (rounded down)
+            shaderStr << "        // Apply right shift to convert from MSB-aligned to actual bit values\n";
+
+            // Apply component-specific shifting based on which aspects are being read
+            if (imageAspects & VK_IMAGE_ASPECT_PLANE_0_BIT) {
+                shaderStr << "        YCbCrRawOut.x = floor(YCbCrRawOut.x / " << shiftFactor
+                          << ".0) * normFactor;\n";
+            }
+
+            if (imageAspects & VK_IMAGE_ASPECT_PLANE_1_BIT) {
+                shaderStr << "        YCbCrRawOut.y = floor(YCbCrRawOut.y / " << shiftFactor
+                          << ".0) * normFactor;\n";
+            }
+
+            if (imageAspects & VK_IMAGE_ASPECT_PLANE_2_BIT) {
+                shaderStr << "        YCbCrRawOut.z = floor(YCbCrRawOut.z / " << shiftFactor
+                          << ".0) * normFactor;\n";
+            }
+
+            shaderStr << "    }\n";
+        }
+    } else {
+        // For images, we're already working with normalized values, so we divide by shiftFactor
+        shaderStr << "    // For high bit-depth data in images that are MSB-aligned,\n"
+                  << "    // we need to divide by " << shiftFactor << " to get the proper normalized values\n";
+
+        // Build a shift mask based on which components are being read
+        std::string shiftMask = "vec3(";
+        shiftMask += (imageAspects & VK_IMAGE_ASPECT_PLANE_0_BIT) ? "1.0, " : "0.0, ";
+        shiftMask += (imageAspects & VK_IMAGE_ASPECT_PLANE_1_BIT) ? "1.0, " : "0.0, ";
+        shiftMask += (imageAspects & VK_IMAGE_ASPECT_PLANE_2_BIT) ? "1.0"   : "0.0";
+        shiftMask += ")";
+
+        // Calculate reciprocal of shift factor (for multiplication instead of division)
+        float shiftFactorRecip = 1.0f / shiftFactor;
+
+        // Only apply shift to the components that were actually read
+        shaderStr << "    // Apply multiplication by reciprocal instead of division (more efficient)\n"
+                  << "    const float shiftFactorRecip = " << std::fixed << std::setprecision(8) << shiftFactorRecip << "f;\n"
+                  << "    YCbCrRawOut = YCbCrRawOut * shiftFactorRecip * " << shiftMask << " + \n"
+                  << "                  YCbCrRawOut * (vec3(1.0) - " << shiftMask << ");\n";
+    }
+}
+
+/**
+ * @brief Generates GLSL function for reading YCbCr data from either buffer or image sources
+ *
+ * Creates a function that reads YCbCr data from the appropriate source (buffer or image)
+ * based on the input format configuration. Handles different bit depths and plane layouts.
+ *
+ * @param shaderStr Output stringstream where the GLSL code will be written
+ * @param isInputBuffer Whether the input is a buffer (true) or image (false)
+ * @param inputBitDepth The bit depth of the input data (8, 10, 12, or 16)
+ * @param isInputTwoPlane Whether the input has two planes (e.g., NV12) or three planes
+ */
+static void GenReadYCbCrBuffer(std::stringstream& shaderStr,
+                               bool isInputBuffer,
+                               uint32_t inputBitDepth,
+                               bool isInputTwoPlane,
+                               bool enableMsbToLsbShift = false,
+                               VkImageAspectFlags imageAspects = VK_IMAGE_ASPECT_PLANE_0_BIT |
+                                                                 VK_IMAGE_ASPECT_PLANE_1_BIT |
+                                                                 VK_IMAGE_ASPECT_PLANE_2_BIT,
+                               const char* useProcessChromaBool = "processChroma")
+{
+    // Generate function to read from either buffer or image
+    shaderStr <<
+        "// Function to read YCbCr data from input source (buffer or image)\n"
+        "vec3 readYCbCrFromSource(ivec2 pos, ivec2 chromaPos, uint srcLayer, bool processChroma) {\n"
+        "    // Initialize to YCbCr black values (for limited range)\n";
+
+    // Set appropriate black values based on bit depth
+    if (inputBitDepth == 8) {
+        shaderStr << "    vec3 YCbCrRawOut = vec3(16.0/255.0, 128.0/255.0, 128.0/255.0);\n\n";
+    } else if (inputBitDepth == 10) {
+        shaderStr << "    vec3 YCbCrRawOut = vec3(64.0/1023.0, 512.0/1023.0, 512.0/1023.0);\n\n";
+    } else if (inputBitDepth == 12) {
+        shaderStr << "    vec3 YCbCrRawOut = vec3(256.0/4095.0, 2048.0/4095.0, 2048.0/4095.0);\n\n";
+    } else if (inputBitDepth == 16) {
+        shaderStr << "    vec3 YCbCrRawOut = vec3(4096.0/65535.0, 32768.0/65535.0, 32768.0/65535.0);\n\n";
+    } else {
+        // Default fallback
+        shaderStr << "    vec3 YCbCrRawOut = vec3(16.0/255.0, 128.0/255.0, 128.0/255.0);\n\n";
+    }
+
+    if (isInputBuffer) {
+        // Reading from buffer
+        shaderStr << "    // Reading from buffer source\n";
+
+        // Read Y component if PLANE_0_BIT is set
+        if (imageAspects & VK_IMAGE_ASPECT_PLANE_0_BIT) {
+            shaderStr <<
+                "    // Calculate buffer index for Y plane\n"
+                "    uint yIndex = pushConstants.inYOffset + pos.y * pushConstants.inYPitch + pos.x;\n"
+                "    YCbCrRawOut.x = fetchYFromBuffer(yIndex);\n\n";
+        }
+
+        // Read Cb/Cr components based on plane format and aspect flags
+        if ((imageAspects & (VK_IMAGE_ASPECT_PLANE_1_BIT | VK_IMAGE_ASPECT_PLANE_2_BIT)) != 0) {
+            // Add conditional check for chroma processing
+            shaderStr << "    // Process chroma data conditionally\n"
+                      << "    if (processChroma) {\n";
+
+            if (isInputTwoPlane) {
+                // Two-plane input buffer format with interleaved CbCr
+                shaderStr << "        // Read interleaved CbCr data from 2-plane input buffer\n"
+                          << "        uint cbcrIndex = pushConstants.inCbOffset + chromaPos.y * pushConstants.inCbPitch + chromaPos.x * 2;\n";
+
+                if (imageAspects & VK_IMAGE_ASPECT_PLANE_1_BIT) {
+                    shaderStr << "        YCbCrRawOut.y = fetchCbFromBuffer(cbcrIndex);\n"
+                              << "        YCbCrRawOut.z = fetchCrFromBuffer(cbcrIndex + 1);\n";
+                }
+            } else {
+                // Three-plane input buffer format with separate Cb and Cr planes
+                shaderStr << "        // Read separate Cb and Cr from 3-plane input buffer\n";
+
+                if (imageAspects & VK_IMAGE_ASPECT_PLANE_1_BIT) {
+                    shaderStr << "        uint cbIndex = pushConstants.inCbOffset + chromaPos.y * pushConstants.inCbPitch + chromaPos.x;\n"
+                              << "        YCbCrRawOut.y = fetchCbFromBuffer(cbIndex);\n";
+                }
+
+                if (imageAspects & VK_IMAGE_ASPECT_PLANE_2_BIT) {
+                    shaderStr << "        uint crIndex = pushConstants.inCrOffset + chromaPos.y * pushConstants.inCrPitch + chromaPos.x;\n"
+                              << "        YCbCrRawOut.z = fetchCrFromBuffer(crIndex);\n";
+                }
+            }
+
+            // Close the conditional block
+            shaderStr << "    }\n";
+        }
+    } else {
+        // Reading from image
+        shaderStr << "    // Reading from image source\n";
+
+        // Read Y component if PLANE_0_BIT is set
+        if (imageAspects & VK_IMAGE_ASPECT_PLANE_0_BIT) {
+            shaderStr << "    // Read Y value from Y plane\n"
+                      << "    YCbCrRawOut.x = imageLoad(inputImageY, ivec3(pos, srcLayer)).r;\n\n";
+        }
+
+        // Read Cb/Cr components based on plane format and aspect flags
+        if ((imageAspects & (VK_IMAGE_ASPECT_PLANE_1_BIT | VK_IMAGE_ASPECT_PLANE_2_BIT)) != 0) {
+            // Add conditional check for chroma processing
+            shaderStr << "    // Process chroma data conditionally\n"
+                      << "    if (processChroma) {\n";
+
+            if (isInputTwoPlane) {
+                // Two-plane input image format with interleaved CbCr
+                shaderStr << "        // Read interleaved CbCr data from 2-plane input image\n";
+
+                if (imageAspects & VK_IMAGE_ASPECT_PLANE_1_BIT) {
+                    // For two-plane formats (NV12, etc.), both Cb and Cr are in the second plane
+                    shaderStr << "        YCbCrRawOut.yz = imageLoad(inputImageCbCr, ivec3(chromaPos, srcLayer)).rg;\n";
+                }
+            } else {
+                // Three-plane input image format with separate Cb and Cr planes
+                shaderStr << "        // Read separate Cb and Cr from 3-plane input image\n";
+
+                if (imageAspects & VK_IMAGE_ASPECT_PLANE_1_BIT) {
+                    shaderStr << "        YCbCrRawOut.y = imageLoad(inputImageCb, ivec3(chromaPos, srcLayer)).r; // Cb\n";
+                }
+
+                if (imageAspects & VK_IMAGE_ASPECT_PLANE_2_BIT) {
+                    shaderStr << "        YCbCrRawOut.z = imageLoad(inputImageCr, ivec3(chromaPos, srcLayer)).r; // Cr\n";
+                }
+            }
+
+            // Close the conditional block
+            shaderStr << "    }\n";
+        }
+    }
+
+    // Apply MSB-to-LSB shift if enabled
+    if (enableMsbToLsbShift) {
+        GenApplyMsbToLsbShift(shaderStr, isInputBuffer, inputBitDepth, imageAspects);
+    }
+
+    // Return the raw YCbCr values
+    shaderStr <<
+        "\n    return YCbCrRawOut;\n"
+        "}\n\n";
+}
+
+/**
+ * @brief Generates GLSL function for applying LSB-to-MSB bit shifting for high bit-depth content
+ *
+ * Creates code to convert normalized values to MSB-aligned high bit-depth content by
+ * applying the appropriate bit shift. This function only handles the shift calculation,
+ * not the actual I/O operations.
+ *
+ * @param shaderStr Output stringstream where the GLSL code will be written
+ * @param isOutputBuffer Whether the output is a buffer (true) or image (false)
+ * @param outputBitDepth The bit depth of the output data (8, 10, 12, or 16)
+ */
+static void GenApplyLsbToMsbShift(std::stringstream& shaderStr,
+                                  bool isOutputBuffer,
+                                  uint32_t outputBitDepth)
+{
+    // Only apply for high bit-depth formats (10/12-bit)
+    if ((outputBitDepth != 10) && (outputBitDepth != 12)) {
+        // For 8-bit or 16-bit, no shift is needed - just use the input values directly
+        shaderStr << "    // No bit-depth shift needed for " << outputBitDepth << "-bit format\n\n";
+        return;
+    }
+
+    // Calculate shift amount based on bit depth
+    uint32_t shiftAmount = 16 - outputBitDepth;
+    float shiftFactor = static_cast<float>(1 << shiftAmount);
+
+    shaderStr << "    // Apply LSB-to-MSB shift for high bit-depth "
+              << (isOutputBuffer ? "buffer" : "image") << " data\n";
+
+    if (isOutputBuffer) {
+        // For buffers, we'll return unshifted values because the packing functions
+        // handle the bit shifting during the actual write operation
+        shaderStr << "    // For buffer output, shift will be applied during packing\n\n";
+    } else {
+        // For images, we need to multiply by shift factor to align bits properly
+        // Calculate multiplication factor
+        shaderStr << "    // For image output with " << outputBitDepth << "-bit, multiply by " << shiftFactor
+                  << " to shift into the MSB\n"
+                  << "    const float shiftFactorMultiplier = " << shiftFactor << ";\n"
+                  << "    YCbCrRawIn = YCbCrRawIn * shiftFactorMultiplier;\n\n";
+    }
+}
+
+/**
+ * @brief Generates GLSL function for writing YCbCr data to either buffer or image destinations
+ *
+ * Creates a function that writes YCbCr data to the appropriate destination (buffer or image)
+ * based on the output format configuration. Handles different bit depths and plane layouts.
+ *
+ * @param shaderStr Output stringstream where the GLSL code will be written
+ * @param isOutputBuffer Whether the output is a buffer (true) or image (false)
+ * @param outputBitDepth The bit depth of the output data (8, 10, 12, or 16)
+ * @param isOutputTwoPlane Whether the output format has two planes (e.g., NV12) or three planes
+ */
+static void GenWriteYCbCrBuffer(std::stringstream& shaderStr,
+                                bool isOutputBuffer,
+                                uint32_t outputBitDepth,
+                                bool isOutputTwoPlane,
+                                bool enableLsbToMsbShift = false,
+                                VkImageAspectFlags imageAspects = VK_IMAGE_ASPECT_PLANE_0_BIT |
+                                                                  VK_IMAGE_ASPECT_PLANE_1_BIT |
+                                                                  VK_IMAGE_ASPECT_PLANE_2_BIT,
+                                const char* useProcessChromaBool = "processChroma")
+{
+    // Generate function to write to either buffer or image
+    shaderStr <<
+        "// Function to write YCbCr data to output destination (buffer or image)\n"
+        "void writeYCbCrToDestination(vec3 YCbCrRawIn, ivec2 pos, ivec2 chromaPos, uint dstLayer, bool processChroma) {\n";
+
+    // Apply LSB-to-MSB shift if enabled - just transforms the values, doesn't do I/O
+    if (enableLsbToMsbShift) {
+        GenApplyLsbToMsbShift(shaderStr, isOutputBuffer, outputBitDepth);
+    }
+
+    if (isOutputBuffer) {
+        // Writing to buffer
+        shaderStr <<
+            "    // Writing to buffer destination\n";
+
+        // Write Y component if PLANE_0_BIT is set
+        if (imageAspects & VK_IMAGE_ASPECT_PLANE_0_BIT) {
+            shaderStr <<
+                "    // Calculate buffer index for Y plane\n"
+                "    uint outYIndex = pushConstants.outYOffset + pos.y * pushConstants.outYPitch + pos.x;\n\n";
+
+            // Handle normal Y component based on bit depth
+            if (outputBitDepth > 8) {
+                // For high bit-depth formats
+                switch (outputBitDepth) {
+                    case 10:
+                        shaderStr << "    outputBufferY.data[outYIndex] = pack10BitTo16Bit(YCbCrRawIn.x);\n\n";
+                        break;
+                    case 12:
+                        shaderStr << "    outputBufferY.data[outYIndex] = pack12BitTo16Bit(YCbCrRawIn.x);\n\n";
+                        break;
+                    case 16:
+                    default:
+                        // For 16-bit, direct value
+                        shaderStr << "    outputBufferY.data[outYIndex] = uint16_t(clamp(YCbCrRawIn.x, 0.0, 65535.0));\n\n";
+                        break;
+                }
+            } else {
+                // For 8-bit formats
+                shaderStr << "    outputBufferY.data[outYIndex] = uint8_t(clamp(YCbCrRawIn.x, 0.0, 255.0));\n\n";
+            }
+        }
+
+        // Write Cb/Cr components based on plane format and aspect flags
+        if ((imageAspects & (VK_IMAGE_ASPECT_PLANE_1_BIT | VK_IMAGE_ASPECT_PLANE_2_BIT)) != 0) {
+            shaderStr << "    // Process chroma data conditionally\n"
+                      << "    if (processChroma) {\n";
+
+            if (isOutputTwoPlane) {
+                // Two-plane output buffer format with interleaved CbCr
+                shaderStr << "        // Write interleaved CbCr to 2-plane output buffer\n"
+                          << "        uint outCbCrIndex = pushConstants.outCbOffset + chromaPos.y * pushConstants.outCbPitch + chromaPos.x * 2;\n";
+
+                // Normal CbCr processing
+                if (outputBitDepth > 8) {
+                    // For high bit-depth formats with interleaved data
+                    switch (outputBitDepth) {
+                        case 10:
+                            if (imageAspects & VK_IMAGE_ASPECT_PLANE_1_BIT) {
+                                shaderStr << "        outputBufferCbCr.data[outCbCrIndex] = pack10BitTo16Bit(YCbCrRawIn.y);\n"
+                                          << "        outputBufferCbCr.data[outCbCrIndex + 1] = pack10BitTo16Bit(YCbCrRawIn.z);\n";
+                            }
+                            break;
+                        case 12:
+                            if (imageAspects & VK_IMAGE_ASPECT_PLANE_1_BIT) {
+                                shaderStr << "        outputBufferCbCr.data[outCbCrIndex] = pack12BitTo16Bit(YCbCrRawIn.y);\n"
+                                          << "        outputBufferCbCr.data[outCbCrIndex + 1] = pack12BitTo16Bit(YCbCrRawIn.z);\n";
+                            }
+                            break;
+                        case 16:
+                        default:
+                            // For 16-bit, direct values
+                            if (imageAspects & VK_IMAGE_ASPECT_PLANE_1_BIT) {
+                                shaderStr << "        outputBufferCbCr.data[outCbCrIndex] = uint16_t(clamp(YCbCrRawIn.y, 0.0, 65535.0));\n"
+                                          << "        outputBufferCbCr.data[outCbCrIndex + 1] = uint16_t(clamp(YCbCrRawIn.z, 0.0, 65535.0));\n";
+                            }
+                            break;
+                    }
+                } else {
+                    // For 8-bit formats
+                    if (imageAspects & VK_IMAGE_ASPECT_PLANE_1_BIT) {
+                        shaderStr << "        outputBufferCbCr.data[outCbCrIndex] = uint8_t(clamp(YCbCrRawIn.y, 0.0, 255.0));\n"
+                                  << "        outputBufferCbCr.data[outCbCrIndex + 1] = uint8_t(clamp(YCbCrRawIn.z, 0.0, 255.0));\n";
+                    }
+                }
+            } else {
+                // Three-plane output buffer format with separate Cb and Cr planes
+                shaderStr << "        // Write separate Cb and Cr to 3-plane output buffer\n";
+
+                // Calculate indices for separate planes
+                if (imageAspects & VK_IMAGE_ASPECT_PLANE_1_BIT) {
+                    shaderStr << "        uint outCbIndex = pushConstants.outCbOffset + chromaPos.y * pushConstants.outCbPitch + chromaPos.x;\n";
+                }
+
+                if (imageAspects & VK_IMAGE_ASPECT_PLANE_2_BIT) {
+                    shaderStr << "        uint outCrIndex = pushConstants.outCrOffset + chromaPos.y * pushConstants.outCrPitch + chromaPos.x;\n";
+                }
+
+                if (outputBitDepth > 8) {
+                    // For high bit-depth formats
+                    switch (outputBitDepth) {
+                        case 10:
+                            if (imageAspects & VK_IMAGE_ASPECT_PLANE_1_BIT) {
+                                shaderStr << "        outputBufferCb.data[outCbIndex] = pack10BitTo16Bit(YCbCrRawIn.y);\n";
+                            }
+                            if (imageAspects & VK_IMAGE_ASPECT_PLANE_2_BIT) {
+                                shaderStr << "        outputBufferCr.data[outCrIndex] = pack10BitTo16Bit(YCbCrRawIn.z);\n";
+                            }
+                            break;
+                        case 12:
+                            if (imageAspects & VK_IMAGE_ASPECT_PLANE_1_BIT) {
+                                shaderStr << "        outputBufferCb.data[outCbIndex] = pack12BitTo16Bit(YCbCrRawIn.y);\n";
+                            }
+                            if (imageAspects & VK_IMAGE_ASPECT_PLANE_2_BIT) {
+                                shaderStr << "        outputBufferCr.data[outCrIndex] = pack12BitTo16Bit(YCbCrRawIn.z);\n";
+                            }
+                            break;
+                        case 16:
+                        default:
+                            // For 16-bit, direct values
+                            if (imageAspects & VK_IMAGE_ASPECT_PLANE_1_BIT) {
+                                shaderStr << "        outputBufferCb.data[outCbIndex] = uint16_t(clamp(YCbCrRawIn.y, 0.0, 65535.0));\n";
+                            }
+                            if (imageAspects & VK_IMAGE_ASPECT_PLANE_2_BIT) {
+                                shaderStr << "        outputBufferCr.data[outCrIndex] = uint16_t(clamp(YCbCrRawIn.z, 0.0, 65535.0));\n";
+                            }
+                            break;
+                    }
+                } else {
+                    // For 8-bit formats
+                    if (imageAspects & VK_IMAGE_ASPECT_PLANE_1_BIT) {
+                        shaderStr << "        outputBufferCb.data[outCbIndex] = uint8_t(clamp(YCbCrRawIn.y, 0.0, 255.0));\n";
+                    }
+                    if (imageAspects & VK_IMAGE_ASPECT_PLANE_2_BIT) {
+                        shaderStr << "        outputBufferCr.data[outCrIndex] = uint8_t(clamp(YCbCrRawIn.z, 0.0, 255.0));\n";
+                    }
+                }
+            }
+
+            shaderStr << "    }\n"; // Close conditional chroma processing
+        }
+    } else {
+        // Writing to image
+        shaderStr << "    // Writing to image destination\n";
+
+        // Write Y component if PLANE_0_BIT is set
+        if (imageAspects & VK_IMAGE_ASPECT_PLANE_0_BIT) {
+            shaderStr << "    // Write Y component to Y plane\n"
+                      << "    imageStore(outputImageY, ivec3(pos, dstLayer), vec4(YCbCrRawIn.x, 0, 0, 1));\n\n";
+        }
+
+        // Write Cb/Cr components if their aspect flags are set
+        if ((imageAspects & (VK_IMAGE_ASPECT_PLANE_1_BIT | VK_IMAGE_ASPECT_PLANE_2_BIT)) != 0) {
+            // Add conditional check for chroma processing
+            shaderStr << "    // Process chroma data conditionally\n"
+                      << "    if (processChroma) {\n";
+
+            if (isOutputTwoPlane) {
+                // Two-plane output image format with interleaved CbCr
+                if ((imageAspects & VK_IMAGE_ASPECT_PLANE_1_BIT) != 0) {
+                    // Both Cb and Cr are needed
+                    shaderStr << "        // Write interleaved CbCr to 2-plane output image\n"
+                              << "        imageStore(outputImageCbCr, ivec3(chromaPos, dstLayer), "
+                              << "vec4(YCbCrRawIn.y, YCbCrRawIn.z, 0, 1));\n";
+                }
+            } else {
+                // Three-plane output image format with separate Cb and Cr planes
+                shaderStr << "        // Write separate Cb and Cr to 3-plane output image\n";
+
+                if (imageAspects & VK_IMAGE_ASPECT_PLANE_1_BIT) {
+                    shaderStr << "        imageStore(outputImageCb, ivec3(chromaPos, dstLayer), vec4(YCbCrRawIn.y, 0, 0, 1));\n";
+                }
+
+                if (imageAspects & VK_IMAGE_ASPECT_PLANE_2_BIT) {
+                    shaderStr << "        imageStore(outputImageCr, ivec3(chromaPos, dstLayer), vec4(YCbCrRawIn.z, 0, 0, 1));\n";
+                }
+            }
+
+            // Close the conditional block
+            shaderStr << "    }\n";
+        }
+    }
+
+    // End the function
+    shaderStr << "}\n\n";
+}
+
+uint32_t VulkanFilterYuvCompute::ShaderGenerateImagePlaneDescriptors(std::stringstream& shaderStr,
+                                                                     VkImageAspectFlags& imageAspects,
+                                                                     const char *imageName,
+                                                                     VkFormat    imageFormat,
+                                                                     bool isInput,
+                                                                     uint32_t startBinding,
+                                                                     uint32_t set,
+                                                                     bool imageArray)
+{
+    shaderStr << " // The " << (isInput ? "input" : "output") << " image binding\n";
     // Image binding goes in this pattern:
     // offset 0: RGBA image
     // offset 1: multi-planar image plane Y
@@ -267,7 +1183,8 @@ void VulkanFilterYuvCompute::ShaderGeneratePlaneDescriptors(std::stringstream& s
 
         } else if (inputMpInfo->planesLayout.numberOfExtraPlanes == 2) {
 
-            imageAspects = VK_IMAGE_ASPECT_PLANE_0_BIT | VK_IMAGE_ASPECT_PLANE_2_BIT;
+            imageAspects = VK_IMAGE_ASPECT_PLANE_0_BIT | VK_IMAGE_ASPECT_PLANE_1_BIT |
+                                                         VK_IMAGE_ASPECT_PLANE_2_BIT;
 
             GenImageIoBindingLayout(shaderStr, imageName, "Cb",
                                     vkFormatLookUp(inputMpInfo->vkPlaneFormat[1])->name,
@@ -290,10 +1207,631 @@ void VulkanFilterYuvCompute::ShaderGeneratePlaneDescriptors(std::stringstream& s
         GenImageIoBindingLayout(shaderStr, imageName, "RGB",
                                 vkFormatLookUp(imageFormat)->name,
                                 isInput,
-                                startBinding,
+                                startBinding++,
                                 set,
                                 imageArray);
     }
+
+    return startBinding;
+}
+
+uint32_t VulkanFilterYuvCompute::ShaderGenerateBufferPlaneDescriptors(std::stringstream& shaderStr,
+                                                                      VkImageAspectFlags& imageAspects,
+                                                                      const char *bufferName,
+                                                                      VkFormat    bufferFormat,
+                                                                      bool isInput,
+                                                                      uint32_t startBinding,
+                                                                      uint32_t set,
+                                                                      VkDescriptorType bufferType)
+{
+    // Buffer binding follows the same pattern as image binding:
+    // offset 0: Single RGBA buffer with all data
+    // offset 1: Y plane buffer
+    // offset 2: 2-planar CbCr buffer or 3-planar Cb buffer
+    // offset 3: 3-planar Cr buffer
+    const VkMpFormatInfo* inputMpInfo = YcbcrVkFormatInfo(bufferFormat);
+
+    // Determine element size based on format
+    const char* elementType = "uint8_t";  // Default to 8-bit
+
+    shaderStr << " // The " << (isInput ? "input" : "output") << " buffer binding\n";
+    // Check format for higher bit depths (16-bit formats)
+    const VkFormatDesc* formatInfo = vkFormatLookUp(bufferFormat);
+    if (formatInfo && formatInfo->name) {
+        if (strstr(formatInfo->name, "16") != nullptr ||
+            strstr(formatInfo->name, "R16") != nullptr ||
+            strstr(formatInfo->name, "10") != nullptr ||
+            strstr(formatInfo->name, "12") != nullptr) {
+            elementType = "uint16_t";  // Use 16-bit for 10/12/16-bit formats
+        }
+    }
+
+    if (inputMpInfo) {
+        // For multi-planar formats, define separate buffers for each plane
+
+        // Y plane buffer (plane 0)
+        GenBufferIoBindingLayout(shaderStr, bufferName, "Y",
+                                 elementType,
+                                 bufferType,
+                                 isInput,
+                                 ++startBinding,
+                                 set);
+
+        if (inputMpInfo->planesLayout.numberOfExtraPlanes == 1) {
+            // 2-plane format (NV12, NV21, etc.)
+            imageAspects = VK_IMAGE_ASPECT_PLANE_0_BIT | VK_IMAGE_ASPECT_PLANE_1_BIT;
+
+            GenBufferIoBindingLayout(shaderStr, bufferName, "CbCr",
+                                     elementType,
+                                     bufferType,
+                                     isInput,
+                                     ++startBinding,
+                                     set);
+
+        } else if (inputMpInfo->planesLayout.numberOfExtraPlanes == 2) {
+            // 3-plane format (YUV 4:2:0, 4:2:2, 4:4:4, etc.)
+            imageAspects = VK_IMAGE_ASPECT_PLANE_0_BIT | VK_IMAGE_ASPECT_PLANE_1_BIT |
+                                                         VK_IMAGE_ASPECT_PLANE_2_BIT;
+
+            GenBufferIoBindingLayout(shaderStr, bufferName, "Cb",
+                                     elementType,
+                                     bufferType,
+                                     isInput,
+                                     ++startBinding,
+                                     set);
+
+            GenBufferIoBindingLayout(shaderStr, bufferName, "Cr",
+                                     elementType,
+                                     bufferType,
+                                     isInput,
+                                     ++startBinding,
+                                     set);
+        }
+    } else {
+        // For single-plane formats (like RGBA)
+        imageAspects = VK_IMAGE_ASPECT_COLOR_BIT;
+
+        GenBufferIoBindingLayout(shaderStr, bufferName, "RGB",
+                                 elementType,
+                                 bufferType,
+                                 isInput,
+                                 startBinding++,
+                                 set);
+    }
+
+    return startBinding;
+}
+
+
+uint32_t VulkanFilterYuvCompute::ShaderGeneratePlaneDescriptors(std::stringstream& shaderStr,
+                                                                bool isInput,
+                                                                uint32_t startBinding,
+                                                                uint32_t set,
+                                                                bool imageArray,
+                                                                VkDescriptorType bufferType)
+{
+
+    if ((isInput && m_inputIsBuffer) || (!isInput && m_outputIsBuffer)) {
+
+        return ShaderGenerateBufferPlaneDescriptors(shaderStr,
+                                             isInput ? m_inputImageAspects : m_outputImageAspects,
+                                             isInput ? "inputBuffer" : "outputBuffer",
+                                             isInput ? m_inputFormat : m_outputFormat,
+                                             isInput, // isInput
+                                             startBinding,    // startBinding
+                                             set,             // set
+                                             bufferType);
+    } else {
+
+        return ShaderGenerateImagePlaneDescriptors(shaderStr,
+                                            isInput ? m_inputImageAspects : m_outputImageAspects,
+                                            isInput ? "inputImage" : "outputImage",
+                                            isInput ? m_inputFormat : m_outputFormat,
+                                            isInput,       // isInput
+                                            startBinding,  // startBinding
+                                            set,           // set
+                                            imageArray  // imageArray
+                                            );
+    }
+}
+
+/**
+ * @brief Generates GLSL functions for YCbCr normalization with different bit depths
+ *
+ * Creates helper functions to normalize YCbCr values, handling different bit depths,
+ * and applying proper range adjustments (limited/full range).
+ *
+ * Process steps:
+ * 1. Calculate normalization parameters based on bit depth and range
+ * 2. Generate Y normalization function (scaling + offset)
+ * 3. Generate CbCr shifting functions (centering around zero)
+ * 4. Generate CbCr normalization functions (scaling + offset)
+ * 5. Generate bit-depth specific helpers for 10/12-bit formats
+ *
+ * @param shaderStr Output stringstream where the GLSL code will be written
+ * @param bitDepth The bit depth of the YCbCr data (8, 10, 12, or 16)
+ * @param isLimitedRange Whether values are limited range (true) or full range (false)
+ * @param hasChroma Whether to include chroma normalization functions
+ */
+static void GenYCbCrNormalizationFuncs(std::stringstream& shaderStr,
+                                       uint32_t bitDepth = 8,
+                                       bool isLimitedRange = true,
+                                       bool hasChroma = true)
+{
+    // STEP 1: Calculate normalization parameters based on bit depth and range
+    // ===========================================================================
+
+    // Use double precision for calculations to maintain precision
+    double maxValue = (1ULL << bitDepth) - 1.0;  // Max value for the given bit depth
+
+    // Limited range values for different bit depths
+    double yBlack, yWhite, cZero, cScale;
+
+    if (isLimitedRange) {
+        // Step 1.1: Calculate limited range (aka TV/Video range) values
+        // Use standard-compliant values for different bit depths
+        switch (bitDepth) {
+            case 10:
+                // 10-bit limited range: Y[64,940], C[64,960]
+                yBlack = 64.0;
+                yWhite = 940.0;
+                cZero = 64.0;
+                cScale = 896.0;  // 960 - 64
+                break;
+            case 12:
+                // 12-bit limited range: Y[256,3760], C[256,3840]
+                yBlack = 256.0;
+                yWhite = 3760.0;
+                cZero = 256.0;
+                cScale = 3584.0;  // 3840 - 256
+                break;
+            case 16:
+                // 16-bit limited range: scale 8-bit values by 2^8
+                yBlack = 16.0 * 256.0;
+                yWhite = 235.0 * 256.0;
+                cZero = 16.0 * 256.0;
+                cScale = 224.0 * 256.0;
+                break;
+            case 8:
+            default:
+                // 8-bit limited range: Y[16,235], C[16,240]
+                yBlack = 16.0;
+                yWhite = 235.0;
+                cZero = 16.0;
+                cScale = 224.0;
+                break;
+        }
+    } else {
+        // Step 1.2: Calculate full range values (same for all bit depths, just scaled)
+        yBlack = 0.0;
+        yWhite = maxValue;
+        cZero = 0.0;
+        cScale = maxValue;
+    }
+
+    // Step 1.3: Calculate normalization factors with double precision
+    double yRange = yWhite - yBlack;
+    double yFactor = 1.0 / yRange;
+    double yOffset = -yBlack * yFactor;
+    double cFactor = 1.0 / cScale;
+
+    // Format values with high precision for GLSL
+    std::stringstream ss;
+    ss.precision(16); // Use high precision for constants
+
+    // STEP 2: Generate Y normalization function
+    // ===========================================================================
+    shaderStr << "\n"
+              << "// Specify high precision for all floating point calculations\n"
+              << "precision highp float;\n"
+              << "precision highp int;\n"
+              << "\n"
+              << "// STEP 1: Normalize Y component for " << bitDepth << "-bit "
+              << (isLimitedRange ? "limited range" : "full range") << " content\n"
+              << "highp float normalizeY(highp float Y) {\n";
+
+    if (isLimitedRange) {
+        // Step 2.1: Limited range needs black level adjustment and scaling
+        // Format with high precision
+        ss.str("");
+        ss << std::fixed << yFactor;
+        std::string yFactorStr = ss.str();
+
+        ss.str("");
+        ss << std::fixed << yOffset;
+        std::string yOffsetStr = ss.str();
+
+        shaderStr << "    // Step 1.1: Map from [" << yBlack << ", " << yWhite << "] to [0.0, 1.0]\n"
+                  << "    // Formula: normalizedY = (Y - yBlack) / yRange = Y * yFactor + yOffset\n"
+                  << "    return Y * " << yFactorStr << " + " << yOffsetStr << ";\n";
+    } else {
+        // Step 2.2: Full range just needs scaling
+        shaderStr << "    // Step 1.1: Map from [0, " << maxValue << "] to [0.0, 1.0]\n"
+                  << "    // Formula: normalizedY = Y / maxValue\n"
+                  << "    return Y / " << maxValue << ";\n";
+    }
+    shaderStr << "}\n\n";
+
+    if (hasChroma) {
+        // STEP 3: Generate CbCr shifting functions
+        // ===========================================================================
+
+        // Step 3.1: Generate CbCr shifting function for vec2 (common for 2-plane formats)
+        shaderStr << "// STEP 2: Shift CbCr components from centered range to [-0.5, 0.5] range\n"
+                  << "highp vec2 shiftCbCr(highp vec2 CbCr) {\n"
+                  << "    // Step 2.1: Shift from [0.0, 1.0] to [-0.5, 0.5]\n"
+                  << "    return CbCr - 0.5;\n"
+                  << "}\n\n";
+
+        // Step 3.2: Generate CbCr shifting function for vec3 (for full YCbCr triplet)
+        shaderStr << "// Step 2 (alternative): Shift YCbCr components, leaving Y alone but centering CbCr\n"
+                  << "highp vec3 shiftCbCr(highp vec3 ycbcr) {\n"
+                  << "    // Step 2.1: Shift only Cb and Cr from [0.0, 1.0] to [-0.5, 0.5]\n"
+                  << "    const highp vec3 shift = vec3(0.0, -0.5, -0.5);\n"
+                  << "    return ycbcr + shift;\n"
+                  << "}\n\n";
+
+        // STEP 4: Generate CbCr normalization function
+        // ===========================================================================
+        shaderStr << "// STEP 3: Normalize CbCr components for " << bitDepth << "-bit "
+                  << (isLimitedRange ? "limited range" : "full range") << " content\n"
+                  << "highp vec2 normalizeCbCr(highp vec2 CbCr) {\n";
+
+        if (isLimitedRange) {
+            // Step 4.1: Limited range needs zero level adjustment and scaling
+            // Format with high precision
+            ss.str("");
+            ss << std::fixed << cZero;
+            std::string cZeroStr = ss.str();
+
+            ss.str("");
+            ss << std::fixed << cFactor;
+            std::string cFactorStr = ss.str();
+
+            shaderStr << "    // Step 3.1: Map from [" << cZero << ", " << (cZero + cScale) << "] to [0.0, 1.0]\n"
+                      << "    // Formula: normalizedCbCr = (CbCr - cZero) / cScale\n"
+                      << "    return (CbCr - " << cZeroStr << ") * " << cFactorStr << ";\n";
+        } else {
+            // Step 4.2: Full range just needs scaling
+            shaderStr << "    // Step 3.1: Map from [0, " << maxValue << "] to [0.0, 1.0]\n"
+                      << "    // Formula: normalizedCbCr = CbCr / maxValue\n"
+                      << "    return CbCr / " << maxValue << ";\n";
+        }
+        shaderStr << "}\n\n";
+    }
+
+    // STEP 5: Generate bit-depth specific helper functions for 10/12-bit formats
+    // ===========================================================================
+    if (bitDepth == 10) {
+        shaderStr << "// STEP 4: Special 10-bit format handling functions\n"
+                  << "// 10-bit packing formats often store values in uint16 or uint32 with specific bit layouts\n"
+                  << "\n"
+                  << "// Extract 10-bit value from 16-bit storage (common for P010, P210, etc.)\n"
+                  << "highp float extract10BitFrom16Bit(highp uint value) {\n"
+                  << "    // Most 10-bit formats store the value in the most significant 10 bits\n"
+                  << "    highp uint raw10bit = value >> 6; // Shift right to remove 6 padding bits\n"
+                  << "    return float(raw10bit);\n"
+                  << "}\n\n"
+
+                  << "// Extract 10-bit value from 16-bit storage as normalized float\n"
+                  << "highp float extract10BitNormalized(highp uint value) {\n"
+                  << "    highp uint raw10bit = value >> 6; // Shift right to remove 6 padding bits\n"
+                  << "    return float(raw10bit) / 1023.0; // Normalize to [0,1]\n"
+                  << "}\n\n"
+
+                  << "// Normalize packed 10-bit YUV directly\n"
+                  << "highp vec3 normalize10BitYUV(highp uvec3 packedYuv) {\n"
+                  << "    // Extract 10-bit components\n"
+                  << "    highp float y = extract10BitFrom16Bit(packedYuv.x);\n"
+                  << "    highp float cb = extract10BitFrom16Bit(packedYuv.y);\n"
+                  << "    highp float cr = extract10BitFrom16Bit(packedYuv.z);\n"
+                  << "    // Normalize components\n"
+                  << "    y = normalizeY(y);\n"
+                  << "    highp vec2 cbcr = normalizeCbCr(vec2(cb, cr));\n"
+                  << "    return vec3(y, cbcr);\n"
+                  << "}\n\n";
+    } else if (bitDepth == 12) {
+        shaderStr << "// STEP 4: Special 12-bit format handling functions\n"
+                  << "// 12-bit packing formats often store values in uint16 or uint32 with specific bit layouts\n"
+                  << "\n"
+                  << "// Extract 12-bit value from 16-bit storage (common for P012, P212, etc.)\n"
+                  << "highp float extract12BitFrom16Bit(highp uint value) {\n"
+                  << "    // Most 12-bit formats store the value in the most significant 12 bits\n"
+                  << "    highp uint raw12bit = value >> 4; // Shift right to remove 4 padding bits\n"
+                  << "    return float(raw12bit);\n"
+                  << "}\n\n"
+
+                  << "// Extract 12-bit value from 16-bit storage as normalized float\n"
+                  << "highp float extract12BitNormalized(highp uint value) {\n"
+                  << "    highp uint raw12bit = value >> 4; // Shift right to remove 4 padding bits\n"
+                  << "    return float(raw12bit) / 4095.0; // Normalize to [0,1]\n"
+                  << "}\n\n"
+
+                  << "// Normalize packed 12-bit YUV directly\n"
+                  << "highp vec3 normalize12BitYUV(highp uvec3 packedYuv) {\n"
+                  << "    // Extract 12-bit components\n"
+                  << "    highp float y = extract12BitFrom16Bit(packedYuv.x);\n"
+                  << "    highp float cb = extract12BitFrom16Bit(packedYuv.y);\n"
+                  << "    highp float cr = extract12BitFrom16Bit(packedYuv.z);\n"
+                  << "    // Normalize components\n"
+                  << "    y = normalizeY(y);\n"
+                  << "    highp vec2 cbcr = normalizeCbCr(vec2(cb, cr));\n"
+                  << "    return vec3(y, cbcr);\n"
+                  << "}\n\n";
+    }
+}
+
+/**
+ * @brief Generates GLSL functions for YCbCr denormalization with different bit depths
+ *
+ * Creates helper functions to denormalize YCbCr values from normalized [0-1] for Y and
+ * [-0.5,0.5] for CbCr back to the appropriate bit depth and range (limited or full).
+ * This is the inverse operation of GenYCbCrNormalizationFuncs.
+ *
+ * Process steps:
+ * 1. Calculate denormalization parameters based on bit depth and range
+ * 2. Generate Y denormalization function (inverse scaling + offset)
+ * 3. Generate CbCr unshifting functions (recentering to [0,1])
+ * 4. Generate CbCr denormalization functions (inverse scaling + offset)
+ * 5. Generate combined convenience functions
+ * 6. Generate bit-depth specific packing helpers for 10/12-bit formats
+ *
+ * @param shaderStr Output stringstream where the GLSL code will be written
+ * @param bitDepth The target bit depth for the YCbCr data (8, 10, 12, or 16)
+ * @param isLimitedRange Whether target values are limited range (true) or full range (false)
+ * @param hasChroma Whether to include chroma denormalization functions
+ */
+static void GenYCbCrDeNormalizationFuncs(std::stringstream& shaderStr,
+                                         uint32_t bitDepth = 8,
+                                         bool isLimitedRange = true,
+                                         bool hasChroma = true)
+{
+    // STEP 1: Calculate denormalization parameters based on bit depth and range
+    // ===========================================================================
+
+    // Use double precision for calculations to maintain precision
+    double maxValue = (1ULL << bitDepth) - 1.0;  // Max value for the given bit depth
+
+    // Limited range values for different bit depths
+    double yBlack, yWhite, cZero, cScale;
+
+    if (isLimitedRange) {
+        // Step 1.1: Calculate limited range (aka TV/Video range) values
+        // Use standard-compliant values for different bit depths
+        switch (bitDepth) {
+            case 10:
+                // 10-bit limited range: Y[64,940], C[64,960]
+                yBlack = 64.0;
+                yWhite = 940.0;
+                cZero = 64.0;
+                cScale = 896.0;  // 960 - 64
+                break;
+            case 12:
+                // 12-bit limited range: Y[256,3760], C[256,3840]
+                yBlack = 256.0;
+                yWhite = 3760.0;
+                cZero = 256.0;
+                cScale = 3584.0;  // 3840 - 256
+                break;
+            case 16:
+                // 16-bit limited range: scale 8-bit values by 2^8
+                yBlack = 16.0 * 256.0;
+                yWhite = 235.0 * 256.0;
+                cZero = 16.0 * 256.0;
+                cScale = 224.0 * 256.0;
+                break;
+            case 8:
+            default:
+                // 8-bit limited range: Y[16,235], C[16,240]
+                yBlack = 16.0;
+                yWhite = 235.0;
+                cZero = 16.0;
+                cScale = 224.0;
+                break;
+        }
+    } else {
+        // Step 1.2: Calculate full range values (same for all bit depths, just scaled)
+        yBlack = 0.0;
+        yWhite = maxValue;
+        cZero = 0.0;
+        cScale = maxValue;
+    }
+
+    // Step 1.3: Calculate denormalization factors (inverse of normalization)
+    double yRange = yWhite - yBlack;
+
+    // Format values with high precision for GLSL
+    std::stringstream ss;
+    ss.precision(16); // Use high precision for constants
+
+    // STEP 2: Generate Y denormalization function
+    // ===========================================================================
+    shaderStr << "\n"
+              << "// Specify high precision for all floating point calculations\n"
+              << "precision highp float;\n"
+              << "precision highp int;\n"
+              << "\n"
+              << "// STEP 1: Denormalize Y component from [0.0, 1.0] back to " << bitDepth << "-bit "
+              << (isLimitedRange ? "limited range" : "full range") << " content\n"
+              << "highp float denormalizeY(highp float normalizedY) {\n";
+
+    if (isLimitedRange) {
+        // Step 2.1: Limited range needs scaling and black level adjustment
+        // Format with high precision
+        ss.str("");
+        ss << std::fixed << yRange;
+        std::string yRangeStr = ss.str();
+
+        ss.str("");
+        ss << std::fixed << yBlack;
+        std::string yBlackStr = ss.str();
+
+        shaderStr << "    // Step 1.1: Map from [0.0, 1.0] back to [" << yBlack << ", " << yWhite << "]\n"
+                  << "    // Formula: Y = normalizedY * yRange + yBlack\n"
+                  << "    return normalizedY * " << yRangeStr << " + " << yBlackStr << ";\n";
+    } else {
+        // Step 2.2: Full range just needs scaling
+        shaderStr << "    // Step 1.1: Map from [0.0, 1.0] back to [0, " << maxValue << "]\n"
+                  << "    // Formula: Y = normalizedY * maxValue\n"
+                  << "    return normalizedY * " << maxValue << ";\n";
+    }
+    shaderStr << "}\n\n";
+
+    if (hasChroma) {
+        // STEP 3: Generate CbCr unshifting function
+        // ===========================================================================
+        shaderStr << "// STEP 2: Unshift CbCr components from [-0.5, 0.5] range back to centered range [0.0, 1.0]\n"
+                  << "highp vec2 unshiftCbCr(highp vec2 shiftedCbCr) {\n"
+                  << "    // Step 2.1: Shift from [-0.5, 0.5] back to [0.0, 1.0]\n"
+                  << "    return shiftedCbCr + 0.5;\n"
+                  << "}\n\n";
+
+        // STEP 4: Generate CbCr denormalization function
+        // ===========================================================================
+        shaderStr << "// STEP 3: Denormalize CbCr components from [0.0, 1.0] back to " << bitDepth << "-bit "
+                  << (isLimitedRange ? "limited range" : "full range") << " content\n"
+                  << "highp vec2 denormalizeCbCr(highp vec2 normalizedCbCr) {\n";
+
+        if (isLimitedRange) {
+            // Step 4.1: Limited range needs scaling and zero level adjustment
+            // Format with high precision
+            ss.str("");
+            ss << std::fixed << cScale;
+            std::string cScaleStr = ss.str();
+
+            ss.str("");
+            ss << std::fixed << cZero;
+            std::string cZeroStr = ss.str();
+
+            shaderStr << "    // Step 3.1: Map from [0.0, 1.0] back to [" << cZero << ", " << (cZero + cScale) << "]\n"
+                      << "    // Formula: CbCr = normalizedCbCr * cScale + cZero\n"
+                      << "    return normalizedCbCr * " << cScaleStr << " + " << cZeroStr << ";\n";
+        } else {
+            // Step 4.2: Full range just needs scaling
+            shaderStr << "    // Step 3.1: Map from [0.0, 1.0] back to [0, " << maxValue << "]\n"
+                      << "    // Formula: CbCr = normalizedCbCr * maxValue\n"
+                      << "    return normalizedCbCr * " << maxValue << ";\n";
+        }
+        shaderStr << "}\n\n";
+
+        // STEP 5: Generate combined convenience functions
+        // ===========================================================================
+
+        // Step 5.1: Combined unshift and denormalize
+        shaderStr << "// STEP 4: Combined function: unshift and denormalize CbCr in one step\n"
+                  << "highp vec2 unshiftAndDenormalizeCbCr(highp vec2 shiftedCbCr) {\n"
+                  << "    // Step 4.1: First unshift from [-0.5, 0.5] to [0.0, 1.0], then denormalize\n"
+                  << "    return denormalizeCbCr(unshiftCbCr(shiftedCbCr));\n"
+                  << "}\n\n";
+
+        // Step 5.2: Full YCbCr denormalization
+        shaderStr << "// STEP 5: Combined function to denormalize full YCbCr triplet\n"
+                  << "highp vec3 denormalizeYCbCr(highp vec3 normalizedYCbCr) {\n"
+                  << "    // Step 5.1: Denormalize Y component\n"
+                  << "    highp float y = denormalizeY(normalizedYCbCr.x);\n"
+                  << "    // Step 5.2: Unshift and denormalize Cb and Cr components\n"
+                  << "    highp vec2 cbcr = denormalizeCbCr(vec2(normalizedYCbCr.y + 0.5, normalizedYCbCr.z + 0.5));\n"
+                  << "    // Step 5.3: Combine the components into a single vector\n"
+                  << "    return vec3(y, cbcr);\n"
+                  << "}\n\n";
+    }
+
+    // STEP 6: Generate bit-depth specific packing helpers for 10/12-bit formats
+    // ===========================================================================
+    if (bitDepth == 10) {
+        shaderStr << "// STEP 6: Special 10-bit format packing functions\n"
+                  << "// Pack 10-bit values into 16-bit storage (common for P010, P210, etc.)\n"
+                  << "\n"
+                  << "// Pack 10-bit value into 16-bit storage (MSB aligned with padding)\n"
+                  << "highp uint pack10BitTo16Bit(highp float value) {\n"
+                  << "    // Clamp the input value to the valid range for 10-bit\n"
+                  << "    highp uint raw10bit = uint(clamp(value, 0.0, 1023.0));\n"
+                  << "    // Shift left by 6 bits to store in MSB format (standard for P010, etc.)\n"
+                  << "    return raw10bit << 6;\n"
+                  << "}\n\n"
+
+                  << "// Pack normalized [0,1] value into 10-bit MSB aligned format\n"
+                  << "highp uint packNormalizedTo10Bit(highp float normalizedValue) {\n"
+                  << "    // Scale to 10-bit range and pack\n"
+                  << "    highp uint raw10bit = uint(clamp(normalizedValue * 1023.0, 0.0, 1023.0));\n"
+                  << "    return raw10bit << 6;\n"
+                  << "}\n\n"
+
+                  << "// Pack denormalized YUV to 10-bit values\n"
+                  << "highp uvec3 packYUVTo10Bit(highp vec3 yuv) {\n"
+                  << "    // Denormalize components first\n"
+                  << "    highp vec3 denormYuv = denormalizeYCbCr(yuv);\n"
+                  << "    // Pack each component into 16-bit storage (MSB aligned)\n"
+                  << "    return uvec3(\n"
+                  << "        pack10BitTo16Bit(denormYuv.x),  // Y\n"
+                  << "        pack10BitTo16Bit(denormYuv.y),  // Cb\n"
+                  << "        pack10BitTo16Bit(denormYuv.z)   // Cr\n"
+                  << "    );\n"
+                  << "}\n\n";
+    } else if (bitDepth == 12) {
+        shaderStr << "// STEP 6: Special 12-bit format packing functions\n"
+                  << "// Pack 12-bit values into 16-bit storage (common for P012, P212, etc.)\n"
+                  << "\n"
+                  << "// Pack 12-bit value into 16-bit storage (MSB aligned with padding)\n"
+                  << "highp uint pack12BitTo16Bit(highp float value) {\n"
+                  << "    // Clamp the input value to the valid range for 12-bit\n"
+                  << "    highp uint raw12bit = uint(clamp(value, 0.0, 4095.0));\n"
+                  << "    // Shift left by 4 bits to store in MSB format (standard for P012, etc.)\n"
+                  << "    return raw12bit << 4;\n"
+                  << "}\n\n"
+
+                  << "// Pack normalized [0,1] value into 12-bit MSB aligned format\n"
+                  << "highp uint packNormalizedTo12Bit(highp float normalizedValue) {\n"
+                  << "    // Scale to 12-bit range and pack\n"
+                  << "    highp uint raw12bit = uint(clamp(normalizedValue * 4095.0, 0.0, 4095.0));\n"
+                  << "    return raw12bit << 4;\n"
+                  << "}\n\n"
+
+                  << "// Pack denormalized YUV to 12-bit values\n"
+                  << "highp uvec3 packYUVTo12Bit(highp vec3 yuv) {\n"
+                  << "    // Denormalize components first\n"
+                  << "    highp vec3 denormYuv = denormalizeYCbCr(yuv);\n"
+                  << "    // Pack each component into 16-bit storage (MSB aligned)\n"
+                  << "    return uvec3(\n"
+                  << "        pack12BitTo16Bit(denormYuv.x),  // Y\n"
+                  << "        pack12BitTo16Bit(denormYuv.y),  // Cb\n"
+                  << "        pack12BitTo16Bit(denormYuv.z)   // Cr\n"
+                  << "    );\n"
+                  << "}\n\n";
+    }
+}
+
+/**
+ * @brief Generates GLSL function for YCbCr format conversion with normalization and denormalization
+ *
+ * Creates a helper function for converting between different YCbCr formats
+ * that normalizes input values, then denormalizes to the target format.
+ * This handles both bit-depth and range conversions.
+ *
+ * @param shaderStr Output stringstream where the GLSL code will be written
+ * @param inputBitDepth The bit depth of input YCbCr data (8, 10, 12, or 16 bits)
+ * @param outputBitDepth The bit depth of output YCbCr data (8, 10, 12, or 16 bits)
+ * @param isInputLimitedRange Whether the input uses limited range (true) or full range (false)
+ * @param isOutputLimitedRange Whether the output uses limited range (true) or full range (false)
+ */
+static void GenConvertYCbCrFormat(std::stringstream& shaderStr,
+                                  uint32_t inputBitDepth = 8,
+                                  uint32_t outputBitDepth = 8,
+                                  bool isInputLimitedRange = true,
+                                  bool isOutputLimitedRange = true)
+{
+    shaderStr <<
+        "// Function to handle YCbCr format conversion with proper normalization\n"
+        "vec3 convertYCbCrFormat(vec3 YCbCrRawIn) {\n"
+        "    // Step 1: Normalize input YCbCr values to [0-1] range\n"
+        "    float normalizedY = normalizeY(YCbCrRawIn.x);\n"
+        "    vec2 normalizedCbCr = normalizeCbCr(vec2(YCbCrRawIn.y, YCbCrRawIn.z));\n\n"
+        "    // Step 2: Denormalize to output bit depth and range\n"
+        "    float y = denormalizeY(normalizedY);\n"
+        "    vec2 cbcr = denormalizeCbCr(normalizedCbCr);\n\n"
+        "    // Return the converted values\n"
+        "    return vec3(y, cbcr.x, cbcr.y);\n"
+        "}\n\n";
 }
 
 size_t VulkanFilterYuvCompute::InitYCBCR2RGBA(std::string& computeShader)
@@ -307,56 +1845,45 @@ size_t VulkanFilterYuvCompute::InitYCBCR2RGBA(std::string& computeShader)
 
     // Create compute pipeline
     std::stringstream shaderStr;
+
+    // 1. Generate header and push constants
     GenHeaderAndPushConst(shaderStr);
+
+    // 2. Generate IO bindings
     // Input image
-    shaderStr << " // The input YCbCr image binding\n";
+    shaderStr << " // The input YCbCr input binding\n";
+    // Input Descriptors
     ShaderGeneratePlaneDescriptors(shaderStr,
-                                   m_inputImageAspects,
-                                   "inputImage",
-                                   m_inputFormat,
                                    true, // isInput
                                    0,    // startBinding
                                    0,    // set
-                                   true  // imageArray
-                                   );
-
-        // Output image
-        shaderStr << " // The output RGBA image binding\n";
-        ShaderGeneratePlaneDescriptors(shaderStr,
-                                       m_outputImageAspects,
-                                       "outputImage",
-                                       m_outputFormat,
-                                       false, // isInput
-                                       4,     // startBinding
-                                       0,     // set
-                                       true   // imageArray
-                                       );
-
-        shaderStr << "\n"
-                     " // TODO: normalize only narrow\n"
-                     "float normalizeY(float Y) {\n"
-                      "    // return (Y - (16.0 / 255.0)) * (255.0 / (235.0 - 16.0));\n"
-                      "    return (Y - 0.0627451) * 1.164383562;\n"
-                     "}\n"
-                     "\n"
-                     "vec2 shiftCbCr(vec2 CbCr) {\n"
-                     "    return CbCr - 0.5;\n"
-                     "}\n"
-                     "\n"
-                     "vec3 shiftCbCr(vec3 ycbcr) {\n"
-                     "    const vec3 shiftCbCr  = vec3(0.0, -0.5, -0.5);\n"
-                     "    return ycbcr + shiftCbCr;\n"
-                     "}\n"
-                     "\n"
-                     " // TODO: normalize only narrow\n"
-                     "vec2 normalizeCbCr(vec2 CbCr) {\n"
-                     "    // return (CbCr - (16.0 / 255.0)) / ((240.0 - 16.0) / 255.0);\n"
-                     "    return (CbCr - 0.0627451) * 1.138392857;\n"
-                     "}\n"
-                     "\n";
-
-    const VkSamplerYcbcrConversionCreateInfo& samplerYcbcrConversionCreateInfo = m_samplerYcbcrConversion.GetSamplerYcbcrConversionCreateInfo();
-    const VkMpFormatInfo * mpInfo = YcbcrVkFormatInfo(samplerYcbcrConversionCreateInfo.format);
+                                   true,
+                                   VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
+
+    shaderStr << " // The output RGBA image binding\n";
+    // Output Descriptors
+    ShaderGeneratePlaneDescriptors(shaderStr,
+                                   false, // isInput
+                                   4,     // startBinding
+                                   0,     // set
+                                   true,  // imageArray
+                                   VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
+
+    // Get format information to determine bit depth
+    const VkSamplerYcbcrConversionCreateInfo& samplerYcbcrConversionCreateInfo =
+        m_samplerYcbcrConversion.GetSamplerYcbcrConversionCreateInfo();
+    const VkMpFormatInfo* mpInfo = YcbcrVkFormatInfo(samplerYcbcrConversionCreateInfo.format);
+
+    // Determine bit depth from the format
+    uint32_t bitDepth = mpInfo ? GetBitsPerChannel(mpInfo->planesLayout) : 8;
+
+    // Determine if we're using limited or full range
+    bool isLimitedRange = (samplerYcbcrConversionCreateInfo.ycbcrRange == VK_SAMPLER_YCBCR_RANGE_ITU_NARROW);
+
+    // 3. Generate helper functions for YCbCr normalization with proper bit depth handling
+    GenYCbCrNormalizationFuncs(shaderStr, bitDepth, isLimitedRange, true);
+
+    // 4. Generate YCbCr to RGB conversion function
     const unsigned int bpp = (8 + mpInfo->planesLayout.bpp * 2);
 
     const YcbcrBtStandard btStandard = GetYcbcrPrimariesConstantsId(samplerYcbcrConversionCreateInfo.ycbcrModel);
@@ -367,7 +1894,6 @@ size_t VulkanFilterYuvCompute::InitYCBCR2RGBA(std::string& computeShader)
                                     rangeConstants.cbMax,
                                     rangeConstants.crMax);
 
-
     shaderStr <<
         "vec3 convertYCbCrToRgb(vec3 yuv) {\n"
         "    vec3 rgb;\n";
@@ -377,7 +1903,7 @@ size_t VulkanFilterYuvCompute::InitYCBCR2RGBA(std::string& computeShader)
         "}\n"
         "\n";
 
-
+    // 5. Generate color range normalization function
     YcbcrNormalizeColorRange yCbCrNormalizeColorRange(bpp,
             (samplerYcbcrConversionCreateInfo.ycbcrModel == VK_SAMPLER_YCBCR_MODEL_CONVERSION_RGB_IDENTITY) ?
                     YCBCR_COLOR_RANGE_NATURAL : (YCBCR_COLOR_RANGE)samplerYcbcrConversionCreateInfo.ycbcrRange);
@@ -390,21 +1916,51 @@ size_t VulkanFilterYuvCompute::InitYCBCR2RGBA(std::string& computeShader)
         "}\n"
         "\n";
 
+    // 6. Generate function to fetch YCbCr components from images
+    shaderStr <<
+        "vec3 fetchYCbCrFromImage(ivec3 pos) {\n"
+        "    // Fetch from the texture.\n"
+        "    float Y = imageLoad(inputImageY, pos).r;\n"
+        "    // For subsampled formats, divide by 2\n"
+        "    vec2 CbCr = imageLoad(inputImageCbCr, ivec3(pos.xy/2, pos.z)).rg;\n"
+        "    return vec3(Y, CbCr);\n"
+        "}\n"
+        "\n";
+
+    // 7. Generate function to write RGBA to output image
+    shaderStr <<
+        "void writeRgbaToImage(vec4 rgba, ivec3 pos) {\n"
+        "    imageStore(outputImageRGB, pos, rgba);\n"
+        "}\n"
+        "\n";
+
+    // 8. Main function
     shaderStr <<
         "void main()\n"
         "{\n";
+
+    // 9. Handle position calculation
     GenHandleImagePosition(shaderStr);
+
+    // 10. Calculate source position with replication if enabled
     GenHandleSourcePositionWithReplicate(shaderStr, m_enableRowAndColumnReplication);
+
+    // 11. YCbCr to RGB conversion
     shaderStr <<
-        "    // Fetch from the texture.\n"
-        "    float Y = imageLoad(inputImageY, ivec3(srcPos, pushConstants.srcImageLayer)).r;\n"
-        "    // TODO: it is /2 only for sub-sampled formats\n"
-        "    vec2 CbCr = imageLoad(inputImageCbCr, ivec3(srcPos/2, pushConstants.srcImageLayer)).rg;\n"
+        "    // Calculate position with layer\n"
+        "    ivec3 srcPos3D = ivec3(srcPos, pushConstants.srcLayer);\n"
+        "    ivec3 dstPos3D = ivec3(pos, pushConstants.dstLayer);\n"
+        "\n"
+        "    // Fetch YCbCr components\n"
+        "    vec3 ycbcr = fetchYCbCrFromImage(srcPos3D);\n"
+        "\n"
+        "    // Process: normalize, shift, and convert to RGB\n"
+        "    ycbcr = shiftCbCr(normalizeYCbCr(ycbcr));\n"
+        "    vec3 rgb = convertYCbCrToRgb(ycbcr);\n"
         "\n"
-        "    vec3 ycbcr = shiftCbCr(normalizeYCbCr(vec3(Y, CbCr)));\n"
-        "    vec4 rgba = vec4(convertYCbCrToRgb(ycbcr),1.0);\n"
-        "    // Store it back.\n"
-        "    imageStore(outputImageRGB, ivec3(pos, pushConstants.dstImageLayer), rgba);\n"
+        "    // Write final RGBA result\n"
+        "    vec4 rgba = vec4(rgb, 1.0);\n"
+        "    writeRgbaToImage(rgba, dstPos3D);\n"
         "}\n";
 
     computeShader = shaderStr.str();
@@ -429,51 +1985,171 @@ size_t VulkanFilterYuvCompute::InitYCBCRCOPY(std::string& computeShader)
     // 3-planar: Cb (R) binding = 6
     // 3-planar: Cr (R) binding = 7
 
+    // Get format information to determine bit depths
+    const VkMpFormatInfo* inputMpInfo = YcbcrVkFormatInfo(m_inputFormat);
+    const VkMpFormatInfo* outputMpInfo = YcbcrVkFormatInfo(m_outputFormat);
+
+    // Determine bit depth from the formats
+    const uint32_t inputBitDepth = inputMpInfo ? GetBitsPerChannel(inputMpInfo->planesLayout) : 8;
+    const uint32_t outputBitDepth = outputMpInfo ? GetBitsPerChannel(outputMpInfo->planesLayout) : 8;
+
+    // Determine if we're using limited or full range for input and output
+    // Default to limited range as it's more common for YCbCr content
+    const VkSamplerYcbcrConversionCreateInfo& samplerYcbcrConversionCreateInfo =
+        m_samplerYcbcrConversion.GetSamplerYcbcrConversionCreateInfo();
+    const bool isInputLimitedRange = (samplerYcbcrConversionCreateInfo.ycbcrRange == VK_SAMPLER_YCBCR_RANGE_ITU_NARROW);
+    const bool isOutputLimitedRange = isInputLimitedRange; // Usually same as input, but could be configurable
+
+    // Check if input or output are buffers
+    const bool isInputBuffer = m_inputIsBuffer;
+    const bool isOutputBuffer = m_outputIsBuffer;
+
+    // Check if we need to do any bit depth conversion
+    const bool needsBitDepthConversion = (inputBitDepth != outputBitDepth);
+
+    // Check if we need to do any range conversion
+    const bool needsRangeConversion = (isInputLimitedRange != isOutputLimitedRange);
+
     std::stringstream shaderStr;
+
+    // 1. Generate header and push constants
     GenHeaderAndPushConst(shaderStr);
-    // Input image
-    shaderStr << " // The input image binding\n";
+
+    // 2. Generate IO bindings
+    // Input Descriptors
     ShaderGeneratePlaneDescriptors(shaderStr,
-                                   m_inputImageAspects,
-                                   "inputImage",
-                                   m_inputFormat,
                                    true, // isInput
                                    0,    // startBinding
                                    0,    // set
-                                   true  // imageArray
-                                   );
+                                   true,
+                                   VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
 
-    // Output image
-    shaderStr << " // The output image binding\n";
+    // Output Descriptors
     ShaderGeneratePlaneDescriptors(shaderStr,
-                                   m_outputImageAspects,
-                                   "outputImage",
-                                   m_outputFormat,
                                    false, // isInput
                                    4,     // startBinding
                                    0,     // set
-                                   true   // imageArray
-                                   );
+                                   true,  // imageArray
+                                   VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
+
     shaderStr << "\n\n";
 
+    // Determine input and output plane configurations
+    const bool hasInputChroma = (m_inputImageAspects & (VK_IMAGE_ASPECT_PLANE_1_BIT | VK_IMAGE_ASPECT_PLANE_2_BIT)) != 0;
+    const bool hasOutputChroma = (m_outputImageAspects & (VK_IMAGE_ASPECT_PLANE_1_BIT | VK_IMAGE_ASPECT_PLANE_2_BIT)) != 0;
+
+    // Determine if input is two-plane (e.g., NV12) or three-plane (e.g., I420)
+    const bool isInputTwoPlane = (m_inputImageAspects & VK_IMAGE_ASPECT_PLANE_1_BIT) &&
+                          !(m_inputImageAspects & VK_IMAGE_ASPECT_PLANE_2_BIT);
+
+    // Determine if output is two-plane (e.g., NV12) or three-plane (e.g., I420)
+    const bool isOutputTwoPlane = (m_outputImageAspects & VK_IMAGE_ASPECT_PLANE_1_BIT) &&
+                           !(m_outputImageAspects & VK_IMAGE_ASPECT_PLANE_2_BIT);
+
+    // 3. Add any bit depth handling functions needed
+    if (isInputBuffer && inputBitDepth > 8) {
+        bool isMSB = true; // Default to MSB-aligned (most common case)
+        GenExtractHighBitDepthFunc(shaderStr, isMSB, inputBitDepth);
+    }
+
+    // 4. Add buffer read/write functions if needed
+    if (isInputBuffer) {
+        // Add fetch functions for Y and CbCr from buffer
+        GenFetchYFromBufferFunc(shaderStr, inputBitDepth > 8, inputBitDepth);
+        GenFetchCbCrFromBufferFunc(shaderStr, inputBitDepth > 8, inputBitDepth);
+    }
+
+    // 5. Add YCbCr normalization and denormalization functions for bit depth conversion
+    if (needsBitDepthConversion || needsRangeConversion) {
+        // Generate normalization functions for input format
+        GenYCbCrNormalizationFuncs(shaderStr, inputBitDepth, isInputLimitedRange, hasInputChroma);
+
+        // Generate denormalization functions for output format
+        GenYCbCrDeNormalizationFuncs(shaderStr, outputBitDepth, isOutputLimitedRange, hasOutputChroma);
+    }
+
+    // 6. Generate the read function for YCbCr data
+    GenReadYCbCrBuffer(shaderStr, isInputBuffer, inputBitDepth, isInputTwoPlane, m_inputEnableMsbToLsbShift, m_inputImageAspects);
+
+    // 7. Generate the write function for YCbCr data
+    GenWriteYCbCrBuffer(shaderStr, isOutputBuffer, outputBitDepth, isOutputTwoPlane, m_outputEnableLsbToMsbShift, m_outputImageAspects);
+
+    // 8. Helper function for combined normalization and denormalization
+    if (needsBitDepthConversion || needsRangeConversion) {
+        GenConvertYCbCrFormat(shaderStr, inputBitDepth, outputBitDepth, isInputLimitedRange, isOutputLimitedRange);
+    }
+
+    // 9. Main function
     shaderStr <<
         "void main()\n"
         "{\n";
-    GenHandleImagePosition(shaderStr);
+
+    // 10. Handle position calculation
+    if (isInputBuffer || isOutputBuffer) {
+        // Use buffer position calculation
+        GenHandleBufferPosition(shaderStr);
+    } else {
+        // Use image position calculation
+        GenHandleImagePosition(shaderStr);
+    }
+
+    // 11. Calculate source position with replication if enabled
     GenHandleSourcePositionWithReplicate(shaderStr, m_enableRowAndColumnReplication);
+
+    // 12. Handle YCbCr processing
+
+    // For inputs with chroma, we need to handle subsampling
+    // Get subsampling ratios for input format
+    const uint32_t chromaHorzRatio = (inputMpInfo != nullptr) ? (1 << inputMpInfo->planesLayout.secondaryPlaneSubsampledX) : 1;
+    const uint32_t chromaVertRatio = (inputMpInfo != nullptr) ? (1 << inputMpInfo->planesLayout.secondaryPlaneSubsampledY) : 1;
+
+    // Generate condition for chroma processing based on actual subsampling
     shaderStr <<
-        "    // Read Y value from source Y plane and write it to destination Y plane\n"
-        "    float Y = imageLoad(inputImageY, ivec3(srcPos, pushConstants.srcImageLayer)).r;\n"
-        "    imageStore(outputImageY, ivec3(pos, pushConstants.dstImageLayer), vec4(Y, 0, 0, 1));\n"
-        "\n"
-        "    // Do the same for the CbCr plane, but remember about the 4:2:0 subsampling\n"
-        "    if (srcPos % 2 == ivec2(0, 0)) {\n"
-        "        srcPos /= 2;\n"
-        "        pos /= 2;\n"
-        "        vec2 CbCr = imageLoad(inputImageCbCr, ivec3(srcPos, pushConstants.srcImageLayer)).rg;\n"
-        "        imageStore(outputImageCbCr, ivec3(pos, pushConstants.dstImageLayer), vec4(CbCr, 0, 1));\n"
-        "    }\n"
-        "}\n";
+        "    // Handle proper subsampling based on format (" <<
+        (chromaHorzRatio == 2 ? (chromaVertRatio == 2 ? "4:2:0" : "4:2:2") : "4:4:4") << ")\n";
+
+    // Generate the chroma position condition with a boolean variable
+    GenHandleChromaPosition(shaderStr, chromaHorzRatio, chromaVertRatio, true, "pos", "processChroma");
+
+    // Initialize chroma positions with default values
+    shaderStr << "    // Initialize chroma positions\n"
+              << "    ivec2 chromaSrcPos = srcPos;\n"
+              << "    ivec2 chromaPos = pos;\n\n"
+              << "    // Check if we need to process chroma\n"
+              << "    if (processChroma) {\n";
+
+    // Generate chroma position calculations for source position
+    GenCalculateChromaPosition(shaderStr, chromaHorzRatio, chromaVertRatio, "srcPos", "chromaSrcPos", 8);
+
+    // Generate chroma position calculations for destination position
+    GenCalculateChromaPosition(shaderStr, chromaHorzRatio, chromaVertRatio, "pos", "chromaPos", 8);
+
+    shaderStr << "    }\n";
+
+    // Read YCbCr data using the helper function
+    shaderStr << "\n"
+              << "    // Read YCbCr data from source\n"
+              << "    vec3 YCbCrRawIn = readYCbCrFromSource(srcPos, chromaSrcPos, pushConstants.srcLayer, processChroma);\n\n";
+
+    // Process the data based on whether we need conversion
+    if (needsBitDepthConversion || needsRangeConversion) {
+        shaderStr <<
+                 "    // Need format conversion - normalize and denormalize\n"
+                 "    vec3 YCbCrRawOut = convertYCbCrFormat(YCbCrRawIn);\n\n";
+    } else {
+        shaderStr <<
+                 "    // No format conversion needed - direct copy\n"
+                 "    vec3 YCbCrRawOut = YCbCrRawIn;\n\n";
+    }
+
+    // Write the processed data using the helper function
+    shaderStr <<
+                 "    // Write processed data to destination\n"
+                 "    writeYCbCrToDestination(YCbCrRawOut, pos, chromaPos, pushConstants.dstLayer, processChroma);\n"
+                 "\n\n";
+
+    // Close the main function
+    shaderStr << "}\n";
 
     computeShader = shaderStr.str();
     if (dumpShaders)
@@ -495,37 +2171,849 @@ size_t VulkanFilterYuvCompute::InitYCBCRCLEAR(std::string& computeShader)
 
     // Create compute pipeline
     std::stringstream shaderStr;
+
+    // 1. Generate header and push constants
     GenHeaderAndPushConst(shaderStr);
 
-    // Output image
-    shaderStr << " // The output image binding\n";
+    // 2. Generate output image bindings
+    shaderStr << " // The output descriptors binding\n";
+    // Output Descriptors
     ShaderGeneratePlaneDescriptors(shaderStr,
-                                   m_outputImageAspects,
-                                   "outputImage",
-                                   m_outputFormat,
                                    false, // isInput
                                    4,     // startBinding
                                    0,     // set
-                                   true   // imageArray
-                                   );
+                                   true,  // imageArray
+                                   VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
     shaderStr << "\n\n";
 
+    // Get format information to determine subsampling ratios
+    const VkMpFormatInfo* outputMpInfo = YcbcrVkFormatInfo(m_outputFormat);
+    // Get subsampling ratios for output format
+    const uint32_t chromaHorzRatio = (outputMpInfo != nullptr) ? (1 << outputMpInfo->planesLayout.secondaryPlaneSubsampledX) : 1;
+    const uint32_t chromaVertRatio = (outputMpInfo != nullptr) ? (1 << outputMpInfo->planesLayout.secondaryPlaneSubsampledY) : 1;
+
+
+    // 3. Main function
     shaderStr <<
         "void main()\n"
         "{\n";
+
+    // 4. Handle position calculation
     GenHandleImagePosition(shaderStr);
+
+    // 5. Clear operations for Y plane
     shaderStr <<
-        "    imageStore(outputImageY, ivec3(pos, pushConstants.dstImageLayer), vec4(0.5, 0, 0, 1));\n"
-        "\n"
-        "    // Do the same for the CbCr plane, but remember about the 4:2:0 subsampling\n"
-        "    if (pos % 2 == ivec2(0, 0)) {\n"
-        "        pos /= 2;\n"
-        "        imageStore(outputImageCbCr, ivec3(pos, pushConstants.dstImageLayer), vec4(0.5, 0.5, 0.0, 1.0));\n"
-        "    }\n"
-        "}\n";
+        "    // Clear Y plane with 50% intensity\n"
+        "    imageStore(outputImageY, ivec3(pos, pushConstants.dstLayer), vec4(0.5, 0, 0, 1));\n"
+        "\n";
+
+    // Handle CbCr plane clearing based on format's subsampling
+    shaderStr <<
+        "    // Clear CbCr plane with " <<
+        (chromaHorzRatio == 2 ? (chromaVertRatio == 2 ? "4:2:0" : "4:2:2") : "4:4:4") <<
+        " subsampling\n";
+
+    // Generate a boolean to track whether this position needs chroma clearing
+    GenHandleChromaPosition(shaderStr, chromaHorzRatio, chromaVertRatio, true, "pos", "shouldClearChroma");
+
+    // Handle position for chroma planes
+    shaderStr << "    ivec2 chromaPos = pos;\n";
+    shaderStr << "    if (shouldClearChroma) {\n";
+
+    // Calculate chroma position if necessary
+    GenCalculateChromaPosition(shaderStr, chromaHorzRatio, chromaVertRatio, "pos", "chromaPos", 8);
+
+    // For 2-plane format, output CbCr together
+    if (m_outputImageAspects & VK_IMAGE_ASPECT_PLANE_1_BIT) {
+        shaderStr << "        // Clear CbCr plane with 50% intensity (middle range)\n"
+                  << "        imageStore(outputImageCbCr, ivec3(chromaPos, pushConstants.dstLayer), vec4(0.5, 0.5, 0.0, 1.0));\n";
+    }
+
+    // For 3-plane format, handle Cb and Cr separately
+    if (m_outputImageAspects & VK_IMAGE_ASPECT_PLANE_2_BIT) {
+        shaderStr << "        // Clear separate Cb and Cr planes with 50% intensity (middle range)\n"
+                  << "        imageStore(outputImageCb, ivec3(chromaPos, pushConstants.dstLayer), vec4(0.5, 0.0, 0.0, 1.0));\n"
+                  << "        imageStore(outputImageCr, ivec3(chromaPos, pushConstants.dstLayer), vec4(0.5, 0.0, 0.0, 1.0));\n";
+    }
+
+    shaderStr << "    }\n"
+              << "}\n";
 
     computeShader = shaderStr.str();
     if (dumpShaders)
         std::cout << "\nCompute Shader:\n" << computeShader;
     return computeShader.size();
 }
+
+uint32_t VulkanFilterYuvCompute::GetPlaneIndex(VkImageAspectFlagBits planeAspect) {
+
+    // Returns index 0 for VK_IMAGE_ASPECT_COLOR_BIT and VK_IMAGE_ASPECT_PLANE_0_BIT
+    // Returns index 1 for VK_IMAGE_ASPECT_PLANE_1_BIT
+    // Returns index 2 for VK_IMAGE_ASPECT_PLANE_2_BIT
+
+    // First, verify it's a plane aspect bit
+    assert(planeAspect & validAspects);
+
+    if (planeAspect & VK_IMAGE_ASPECT_COLOR_BIT) {
+        return 0;
+    }
+
+    // Alternatively, without intrinsics:
+    return (planeAspect & VK_IMAGE_ASPECT_PLANE_0_BIT) ? 0 :
+           (planeAspect & VK_IMAGE_ASPECT_PLANE_1_BIT) ? 1 : 2;
+}
+
+uint32_t VulkanFilterYuvCompute::UpdateBufferDescriptorSets(
+                                    const VkBuffer*            vkBuffers,
+                                    uint32_t                   numVkBuffers,
+                                    const VkSubresourceLayout* vkBufferSubresourceLayout,
+                                    uint32_t                   numPlanes,
+                                    VkImageAspectFlags         validImageAspects,
+                                    uint32_t&                  descrIndex,
+                                    uint32_t&                  baseBinding,
+                                    VkDescriptorType           descriptorType, // Ex: VK_DESCRIPTOR_TYPE_STORAGE_BUFFER
+                                    VkDescriptorBufferInfo     bufferDescriptors[maxNumComputeDescr],
+                                    std::array<VkWriteDescriptorSet, maxNumComputeDescr>& writeDescriptorSets,
+                                    const uint32_t maxDescriptors)
+{
+
+    validImageAspects &= validAspects;
+    uint32_t curImageAspect = 0;
+    uint32_t bufferIndex = 0;
+    while(validImageAspects) {
+
+        if (validImageAspects & (VK_IMAGE_ASPECT_COLOR_BIT << curImageAspect) ) {
+
+            uint32_t planeNum = GetPlaneIndex((VkImageAspectFlagBits)(VK_IMAGE_ASPECT_COLOR_BIT << curImageAspect));
+            uint32_t dstBinding = baseBinding;
+            if (curImageAspect > 0) {
+                // the first plane is 1, second plane is 2, the 3rd is 3
+                dstBinding += (1 + planeNum);
+            }
+
+            writeDescriptorSets[descrIndex].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
+            writeDescriptorSets[descrIndex].dstSet = VK_NULL_HANDLE;
+            writeDescriptorSets[descrIndex].dstBinding = dstBinding;
+            writeDescriptorSets[descrIndex].descriptorCount = 1;
+            writeDescriptorSets[descrIndex].descriptorType = descriptorType;
+
+            bufferDescriptors[descrIndex].buffer = vkBuffers[bufferIndex];
+            bufferDescriptors[descrIndex].offset = vkBufferSubresourceLayout[planeNum].offset;
+            bufferDescriptors[descrIndex].range =  vkBufferSubresourceLayout[planeNum].arrayPitch;
+            writeDescriptorSets[descrIndex].pBufferInfo = &bufferDescriptors[descrIndex];
+            descrIndex++;
+            validImageAspects &= ~(VK_IMAGE_ASPECT_COLOR_BIT << curImageAspect);
+            bufferIndex = std::min(numVkBuffers - 1, bufferIndex + 1);
+        }
+
+        curImageAspect++;
+    }
+    assert(descrIndex <= maxDescriptors);
+    return descrIndex;
+}
+
+uint32_t VulkanFilterYuvCompute::UpdateImageDescriptorSets(
+                                    const VkImageResourceView* imageView,
+                                    VkImageAspectFlags         validImageAspects,
+                                    VkSampler                  convSampler,
+                                    VkImageLayout              imageLayout,
+                                    uint32_t&                  descrIndex,
+                                    uint32_t&                  baseBinding,
+                                    VkDescriptorType           descriptorType, // Ex: VK_DESCRIPTOR_TYPE_STORAGE_IMAGE
+                                    VkDescriptorImageInfo      imageDescriptors[maxNumComputeDescr],
+                                    std::array<VkWriteDescriptorSet, maxNumComputeDescr>& writeDescriptorSets,
+                                    const uint32_t maxDescriptors)
+{
+
+    validImageAspects &= validAspects;
+    uint32_t curImageAspect = 0;
+    const uint32_t numPlanes = imageView->GetNumberOfPlanes();
+    while(validImageAspects) {
+
+        if (validImageAspects & (VK_IMAGE_ASPECT_COLOR_BIT << curImageAspect) ) {
+
+            VkSampler ccSampler = (curImageAspect == 0) ? convSampler : VK_NULL_HANDLE;
+            uint32_t planeNum = GetPlaneIndex((VkImageAspectFlagBits)(VK_IMAGE_ASPECT_COLOR_BIT << curImageAspect));
+            assert(planeNum < numPlanes);
+            uint32_t dstBinding = baseBinding;
+            if (curImageAspect > 0) {
+                // the first plane is 1, second plane is 2, the 3rd is 3
+                dstBinding += (1 + planeNum);
+            }
+
+            writeDescriptorSets[descrIndex].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
+            writeDescriptorSets[descrIndex].dstSet = VK_NULL_HANDLE;
+            writeDescriptorSets[descrIndex].dstBinding = dstBinding;
+            writeDescriptorSets[descrIndex].descriptorCount = 1;
+            writeDescriptorSets[descrIndex].descriptorType = (ccSampler != VK_NULL_HANDLE) ?
+                                                              VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER :
+                                                              descriptorType;
+            imageDescriptors[descrIndex].sampler = ccSampler;
+            imageDescriptors[descrIndex].imageView = (curImageAspect == 0) ?
+                                                      imageView->GetImageView() :
+                                                      imageView->GetPlaneImageView(planeNum);
+            assert(imageDescriptors[descrIndex].imageView);
+            imageDescriptors[descrIndex].imageLayout = imageLayout;
+            writeDescriptorSets[descrIndex].pImageInfo = &imageDescriptors[descrIndex]; // Y (0) plane
+            descrIndex++;
+            validImageAspects &= ~(VK_IMAGE_ASPECT_COLOR_BIT << curImageAspect);
+        }
+
+        curImageAspect++;
+    }
+    assert(descrIndex <= maxDescriptors);
+    return descrIndex;
+}
+
+// Image input -> Image output
+VkResult VulkanFilterYuvCompute::RecordCommandBuffer(VkCommandBuffer cmdBuf,
+                                                     const VkImageResourceView* inImageView,
+                                                     const VkVideoPictureResourceInfoKHR * inImageResourceInfo,
+                                                     const VkImageResourceView* outImageView,
+                                                     const VkVideoPictureResourceInfoKHR * outImageResourceInfo,
+                                                     uint32_t bufferIdx)
+{
+
+    assert(cmdBuf != VK_NULL_HANDLE);
+
+    m_vkDevCtx->CmdBindPipeline(cmdBuf, VK_PIPELINE_BIND_POINT_COMPUTE, m_computePipeline.getPipeline());
+
+    VkDescriptorSetLayoutCreateFlags layoutMode = m_descriptorSetLayout.GetDescriptorSetLayoutInfo().GetDescriptorLayoutMode();
+
+    switch (layoutMode) {
+        case VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR:
+        case VK_DESCRIPTOR_SET_LAYOUT_CREATE_DESCRIPTOR_BUFFER_BIT_EXT:
+        {
+
+            VkDescriptorImageInfo imageDescriptors[maxNumComputeDescr]{};
+            std::array<VkWriteDescriptorSet, maxNumComputeDescr> writeDescriptorSets{};
+
+            // Images
+            uint32_t set = 0;
+            uint32_t descrIndex = 0;
+            uint32_t dstBinding = 0;
+
+            // IN 0: RGBA color converted by an YCbCr sample
+            // IN 1: y plane - G -> R8
+            // IN 2: Cb or Cr or CbCr plane - BR -> R8B8
+            // IN 3: Cr or Cb plane - R -> R8
+            UpdateImageDescriptorSets(inImageView,
+                                      m_inputImageAspects,
+                                      m_samplerYcbcrConversion.GetSampler(),
+                                      VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
+                                      descrIndex,
+                                      dstBinding,
+                                      VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+                                      imageDescriptors,
+                                      writeDescriptorSets,
+                                      maxNumComputeDescr / 2 /* max descriptors */);
+
+            dstBinding = 4;
+            // OUT 4: Out RGBA or single planar YCbCr image
+            // OUT 5: y plane - G -> R8
+            // OUT 6: Cb or Cr or CbCr plane - BR -> R8B8
+            // OUT 7: Cr or Cb plane - R -> R8
+            UpdateImageDescriptorSets(outImageView,
+                                      m_outputImageAspects,
+                                      VK_NULL_HANDLE,
+                                      VK_IMAGE_LAYOUT_GENERAL,
+                                      descrIndex,
+                                      dstBinding,
+                                      VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+                                      imageDescriptors,
+                                      writeDescriptorSets,
+                                      maxNumComputeDescr /* max descriptors */);
+
+            assert(descrIndex <= maxNumComputeDescr);
+            assert(descrIndex >= 2);
+
+            if (layoutMode ==  VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR) {
+                m_vkDevCtx->CmdPushDescriptorSetKHR(cmdBuf, VK_PIPELINE_BIND_POINT_COMPUTE,
+                                                    m_descriptorSetLayout.GetPipelineLayout(),
+                                                    set, descrIndex, writeDescriptorSets.data());
+            } else {
+
+                VkDeviceOrHostAddressConstKHR imageDescriptorBufferDeviceAddress =
+                      m_descriptorSetLayout.UpdateDescriptorBuffer(bufferIdx,
+                                                                   set,
+                                                                   descrIndex,
+                                                                   writeDescriptorSets.data());
+
+
+                // Descriptor buffer bindings
+                // Set 0 = Image
+                VkDescriptorBufferBindingInfoEXT bindingInfo{};
+                bindingInfo.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_BUFFER_BINDING_INFO_EXT;
+                bindingInfo.pNext = nullptr;
+                bindingInfo.address = imageDescriptorBufferDeviceAddress.deviceAddress;
+                bindingInfo.usage = VK_BUFFER_USAGE_SAMPLER_DESCRIPTOR_BUFFER_BIT_EXT |
+                                    VK_BUFFER_USAGE_RESOURCE_DESCRIPTOR_BUFFER_BIT_EXT;
+                m_vkDevCtx->CmdBindDescriptorBuffersEXT(cmdBuf, 1, &bindingInfo);
+
+                // Image (set 0)
+                uint32_t bufferIndexImage = 0;
+                VkDeviceSize bufferOffset = 0;
+                m_vkDevCtx->CmdSetDescriptorBufferOffsetsEXT(cmdBuf, VK_PIPELINE_BIND_POINT_COMPUTE,
+                                                           m_descriptorSetLayout.GetPipelineLayout(),
+                                                           set, 1, &bufferIndexImage, &bufferOffset);
+            }
+        }
+        break;
+
+        default:
+        m_vkDevCtx->CmdBindDescriptorSets(cmdBuf, VK_PIPELINE_BIND_POINT_COMPUTE,
+                                          m_descriptorSetLayout.GetPipelineLayout(),
+                                          0, 1, m_descriptorSetLayout.GetDescriptorSet(), 0, 0);
+    }
+
+    struct ivec2 {
+        uint32_t width;
+        uint32_t height;
+
+        ivec2() : width(0), height(0) {}
+        ivec2(int32_t width_, int32_t height_) : width(width_), height(height_) {}
+    };
+
+    struct PushConstants {
+        uint32_t srcLayer;
+        uint32_t dstLayer;
+        ivec2    inputSize;
+        ivec2    outputSize;
+        uint32_t yOffset;   // Y plane offset
+        uint32_t cbOffset;  // Cb plane offset
+        uint32_t crOffset;  // Cr plane offset
+        uint32_t yPitch;    // Y plane pitch
+        uint32_t cbPitch;   // Cb plane pitch
+        uint32_t crPitch;   // Cr plane pitch
+    };
+
+    PushConstants pushConstants = {
+            inImageResourceInfo->baseArrayLayer, // Set the source layer index
+            outImageResourceInfo->baseArrayLayer, // Set the destination layer index
+            ivec2(inImageResourceInfo->codedExtent.width, inImageResourceInfo->codedExtent.height),
+            ivec2(outImageResourceInfo->codedExtent.width, outImageResourceInfo->codedExtent.height),
+            0,  // yOffset - not used for image input
+            0,  // cbOffset - not used for image input
+            0,  // crOffset - not used for image input
+            0,  // yPitch - not used for image input
+            0,  // cbPitch - not used for image input
+            0   // crPitch - not used for image input
+    };
+
+    m_vkDevCtx->CmdPushConstants(cmdBuf,
+                                 m_descriptorSetLayout.GetPipelineLayout(),
+                                 VK_SHADER_STAGE_COMPUTE_BIT,
+                                 0,
+                                 sizeof(PushConstants),
+                                 &pushConstants);
+
+    const uint32_t  workgroupWidth  = (pushConstants.outputSize.width  + (m_workgroupSizeX - 1)) / m_workgroupSizeX;
+    const uint32_t  workgroupHeight = (pushConstants.outputSize.height + (m_workgroupSizeY - 1)) / m_workgroupSizeY;
+    m_vkDevCtx->CmdDispatch(cmdBuf, workgroupWidth, workgroupHeight, 1);
+
+    return VK_SUCCESS;
+}
+
+// Buffer input -> Image output
+VkResult VulkanFilterYuvCompute::RecordCommandBuffer(VkCommandBuffer cmdBuf,
+                                                     const VkBuffer*            inBuffers,
+                                                     uint32_t                   numInBuffers,
+                                                     const VkFormat*            inBufferFormats,
+                                                     const VkSubresourceLayout* inBufferSubresourceLayouts,
+                                                     uint32_t                   inBufferNumPlanes,
+                                                     const VkImageResourceView* outImageView,
+                                                     const VkVideoPictureResourceInfoKHR* outImageResourceInfo,
+                                                     const VkBufferImageCopy* pBufferImageCopy,
+                                                     uint32_t bufferIdx)
+{
+    assert(cmdBuf != VK_NULL_HANDLE);
+    assert(m_inputIsBuffer  == true);
+    assert(m_outputIsBuffer == false);
+
+    m_vkDevCtx->CmdBindPipeline(cmdBuf, VK_PIPELINE_BIND_POINT_COMPUTE, m_computePipeline.getPipeline());
+
+    VkDescriptorSetLayoutCreateFlags layoutMode = m_descriptorSetLayout.GetDescriptorSetLayoutInfo().GetDescriptorLayoutMode();
+
+    switch (layoutMode) {
+        case VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR:
+        case VK_DESCRIPTOR_SET_LAYOUT_CREATE_DESCRIPTOR_BUFFER_BIT_EXT:
+        {
+            VkDescriptorImageInfo imageDescriptors[maxNumComputeDescr / 2]{};
+            VkDescriptorBufferInfo bufferDescriptors[maxNumComputeDescr / 2]{};
+            std::array<VkWriteDescriptorSet, maxNumComputeDescr> writeDescriptorSets{};
+
+            uint32_t set = 0;
+            uint32_t descrIndex = 0;
+            uint32_t dstBinding = 0;
+
+            // Buffer input handling
+            // IN 0: Single buffer YCbCr
+            // IN 1: Y plane buffer
+            // IN 2: Cb, Cr or CbCr plane buffer
+            // IN 3: Cr plane buffer
+            UpdateBufferDescriptorSets(inBuffers, numInBuffers,
+                                       inBufferSubresourceLayouts, inBufferNumPlanes,
+                                       m_inputImageAspects,
+                                       descrIndex, dstBinding,
+                                       VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+                                       bufferDescriptors,
+                                       writeDescriptorSets,
+                                       maxNumComputeDescr / 2);
+
+
+            // Image output
+            dstBinding = 4;
+            // OUT 4: Out RGBA or single planar YCbCr image
+            // OUT 5: y plane - G -> R8
+            // OUT 6: Cb or Cr or CbCr plane - BR -> R8B8
+            // OUT 7: Cr or Cb plane - R -> R8
+            UpdateImageDescriptorSets(outImageView,
+                                      m_outputImageAspects,
+                                      VK_NULL_HANDLE,
+                                      VK_IMAGE_LAYOUT_GENERAL,
+                                      descrIndex,
+                                      dstBinding,
+                                      VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+                                      imageDescriptors,
+                                      writeDescriptorSets,
+                                      maxNumComputeDescr /* max descriptors */);
+
+            assert(descrIndex <= maxNumComputeDescr);
+            assert(descrIndex >= 2);
+
+            if (layoutMode == VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR) {
+                m_vkDevCtx->CmdPushDescriptorSetKHR(cmdBuf, VK_PIPELINE_BIND_POINT_COMPUTE,
+                                                    m_descriptorSetLayout.GetPipelineLayout(),
+                                                    set, descrIndex, writeDescriptorSets.data());
+            } else {
+                VkDeviceOrHostAddressConstKHR descriptorBufferDeviceAddress =
+                      m_descriptorSetLayout.UpdateDescriptorBuffer(bufferIdx,
+                                                                   set,
+                                                                   descrIndex,
+                                                                   writeDescriptorSets.data());
+
+
+                // Descriptor buffer bindings
+                VkDescriptorBufferBindingInfoEXT bindingInfo{};
+                bindingInfo.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_BUFFER_BINDING_INFO_EXT;
+                bindingInfo.pNext = nullptr;
+                bindingInfo.address = descriptorBufferDeviceAddress.deviceAddress;
+                bindingInfo.usage = VK_BUFFER_USAGE_SAMPLER_DESCRIPTOR_BUFFER_BIT_EXT |
+                                    VK_BUFFER_USAGE_RESOURCE_DESCRIPTOR_BUFFER_BIT_EXT;
+                m_vkDevCtx->CmdBindDescriptorBuffersEXT(cmdBuf, 1, &bindingInfo);
+
+                uint32_t bufferIndexImage = 0;
+                VkDeviceSize bufferOffset = 0;
+                m_vkDevCtx->CmdSetDescriptorBufferOffsetsEXT(cmdBuf, VK_PIPELINE_BIND_POINT_COMPUTE,
+                                                         m_descriptorSetLayout.GetPipelineLayout(),
+                                                         set, 1, &bufferIndexImage, &bufferOffset);
+            }
+        }
+        break;
+
+        default:
+        m_vkDevCtx->CmdBindDescriptorSets(cmdBuf, VK_PIPELINE_BIND_POINT_COMPUTE,
+                                          m_descriptorSetLayout.GetPipelineLayout(),
+                                          0, 1, m_descriptorSetLayout.GetDescriptorSet(), 0, 0);
+    }
+
+    struct ivec2 {
+        uint32_t width;
+        uint32_t height;
+
+        ivec2() : width(0), height(0) {}
+        ivec2(int32_t width_, int32_t height_) : width(width_), height(height_) {}
+    };
+
+    struct PushConstants {
+        uint32_t srcLayer;
+        uint32_t dstLayer;
+        ivec2    inputSize;
+        ivec2    outputSize;
+        uint32_t yOffset;   // Y plane offset
+        uint32_t cbOffset;  // Cb plane offset
+        uint32_t crOffset;  // Cr plane offset
+        uint32_t yPitch;    // Y plane pitch
+        uint32_t cbPitch;   // Cb plane pitch
+        uint32_t crPitch;   // Cr plane pitch
+    };
+
+    uint32_t width, height;
+    uint32_t rowPitch;
+
+    assert(pBufferImageCopy);
+    width = pBufferImageCopy->bufferRowLength > 0 ?
+            pBufferImageCopy->bufferRowLength :
+            pBufferImageCopy->imageExtent.width;
+    height = pBufferImageCopy->bufferImageHeight > 0 ?
+             pBufferImageCopy->bufferImageHeight :
+             pBufferImageCopy->imageExtent.height;
+    rowPitch = width;
+
+    VkExtent3D outputExtent = outImageView->GetImageResource()->GetImageCreateInfo().extent;
+
+    VkDeviceSize planeSize = width * height;
+    VkDeviceSize yOffset = pBufferImageCopy ? pBufferImageCopy->bufferOffset : 0;
+    VkDeviceSize cbOffset = yOffset + planeSize;
+    VkDeviceSize crOffset = cbOffset + (planeSize / 4);
+
+    PushConstants pushConstants = {
+            pBufferImageCopy->imageSubresource.baseArrayLayer,
+            outImageResourceInfo->baseArrayLayer,
+            ivec2(width, height),
+            ivec2(outputExtent.width, outputExtent.height),
+            static_cast<uint32_t>(yOffset),
+            static_cast<uint32_t>(cbOffset),
+            static_cast<uint32_t>(crOffset),
+            rowPitch,
+            rowPitch / 2,  // For 4:2:0 format
+            rowPitch / 2   // For 4:2:0 format
+    };
+
+    m_vkDevCtx->CmdPushConstants(cmdBuf,
+                                 m_descriptorSetLayout.GetPipelineLayout(),
+                                 VK_SHADER_STAGE_COMPUTE_BIT,
+                                 0,
+                                 sizeof(PushConstants),
+                                 &pushConstants);
+
+    const uint32_t workgroupWidth = (pushConstants.outputSize.width + (m_workgroupSizeX - 1)) / m_workgroupSizeX;
+    const uint32_t workgroupHeight = (pushConstants.outputSize.height + (m_workgroupSizeY - 1)) / m_workgroupSizeY;
+    m_vkDevCtx->CmdDispatch(cmdBuf, workgroupWidth, workgroupHeight, 1);
+
+    return VK_SUCCESS;
+}
+
+// Image input -> Buffer output
+VkResult VulkanFilterYuvCompute::RecordCommandBuffer(VkCommandBuffer cmdBuf,
+                                                     const VkImageResourceView* inImageView,
+                                                     const VkVideoPictureResourceInfoKHR* inImageResourceInfo,
+                                                     const VkBuffer*            outBuffers,        // with size numOutBuffers
+                                                     uint32_t                   numOutBuffers,
+                                                     const VkFormat*            outBufferFormats,   // with size outBufferNumPlanes
+                                                     const VkSubresourceLayout* outBufferSubresourceLayouts, // with size outBufferNumPlanes
+                                                     uint32_t                   outBufferNumPlanes,
+                                                     const VkBufferImageCopy*   pBufferImageCopy,
+                                                     uint32_t bufferIdx)
+{
+    assert(cmdBuf != VK_NULL_HANDLE);
+    assert(m_inputIsBuffer  == false);
+    assert(m_outputIsBuffer == true);
+
+    m_vkDevCtx->CmdBindPipeline(cmdBuf, VK_PIPELINE_BIND_POINT_COMPUTE, m_computePipeline.getPipeline());
+
+    VkDescriptorSetLayoutCreateFlags layoutMode = m_descriptorSetLayout.GetDescriptorSetLayoutInfo().GetDescriptorLayoutMode();
+
+    switch (layoutMode) {
+        case VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR:
+        case VK_DESCRIPTOR_SET_LAYOUT_CREATE_DESCRIPTOR_BUFFER_BIT_EXT:
+        {
+            VkDescriptorImageInfo imageDescriptors[maxNumComputeDescr / 2]{};
+            VkDescriptorBufferInfo bufferDescriptors[maxNumComputeDescr / 2]{};
+            std::array<VkWriteDescriptorSet, maxNumComputeDescr> writeDescriptorSets{};
+
+            uint32_t set = 0;
+            uint32_t descrIndex = 0;
+            uint32_t dstBinding = 0;
+
+            // IN 0: RGBA color converted by an YCbCr sample
+            // IN 1: y plane - G -> R8
+            // IN 2: Cb or Cr or CbCr plane - BR -> R8B8
+            // IN 3: Cr or Cb plane - R -> R8
+            UpdateImageDescriptorSets(inImageView,
+                                      m_inputImageAspects,
+                                      m_samplerYcbcrConversion.GetSampler(),
+                                      VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
+                                      descrIndex,
+                                      dstBinding,
+                                      VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+                                      imageDescriptors,
+                                      writeDescriptorSets,
+                                      maxNumComputeDescr / 2 /* max descriptors */);
+
+            // Output buffer handling
+            dstBinding = 4;
+            // OUT 0: Single buffer YCbCr
+            // OUT 1: Y plane buffer
+            // OUT 2: Cb, Cr or CbCr plane buffer
+            // OUT 3: Cr or Cb plane buffer
+            UpdateBufferDescriptorSets(outBuffers, numOutBuffers,
+                                       outBufferSubresourceLayouts, outBufferNumPlanes,
+                                       m_inputImageAspects,
+                                       descrIndex, dstBinding,
+                                       VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+                                       bufferDescriptors,
+                                       writeDescriptorSets,
+                                       maxNumComputeDescr);
+
+            assert(descrIndex <= maxNumComputeDescr);
+            assert(descrIndex >= 2);
+
+            if (layoutMode == VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR) {
+                m_vkDevCtx->CmdPushDescriptorSetKHR(cmdBuf, VK_PIPELINE_BIND_POINT_COMPUTE,
+                                                   m_descriptorSetLayout.GetPipelineLayout(),
+                                                   set, descrIndex, writeDescriptorSets.data());
+            } else {
+                VkDeviceOrHostAddressConstKHR descriptorBufferDeviceAddress =
+                     m_descriptorSetLayout.UpdateDescriptorBuffer(bufferIdx,
+                                                                 set,
+                                                                 descrIndex,
+                                                                 writeDescriptorSets.data());
+
+                // Descriptor buffer bindings
+                VkDescriptorBufferBindingInfoEXT bindingInfo{};
+                bindingInfo.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_BUFFER_BINDING_INFO_EXT;
+                bindingInfo.pNext = nullptr;
+                bindingInfo.address = descriptorBufferDeviceAddress.deviceAddress;
+                bindingInfo.usage = VK_BUFFER_USAGE_SAMPLER_DESCRIPTOR_BUFFER_BIT_EXT |
+                                   VK_BUFFER_USAGE_RESOURCE_DESCRIPTOR_BUFFER_BIT_EXT;
+                m_vkDevCtx->CmdBindDescriptorBuffersEXT(cmdBuf, 1, &bindingInfo);
+
+                uint32_t bufferIndexImage = 0;
+                VkDeviceSize bufferOffset = 0;
+                m_vkDevCtx->CmdSetDescriptorBufferOffsetsEXT(cmdBuf, VK_PIPELINE_BIND_POINT_COMPUTE,
+                                                             m_descriptorSetLayout.GetPipelineLayout(),
+                                                             set, 1, &bufferIndexImage, &bufferOffset);
+            }
+        }
+        break;
+
+        default:
+        m_vkDevCtx->CmdBindDescriptorSets(cmdBuf, VK_PIPELINE_BIND_POINT_COMPUTE,
+                                          m_descriptorSetLayout.GetPipelineLayout(),
+                                          0, 1, m_descriptorSetLayout.GetDescriptorSet(), 0, 0);
+    }
+
+    struct ivec2 {
+        uint32_t width;
+        uint32_t height;
+
+        ivec2() : width(0), height(0) {}
+        ivec2(int32_t width_, int32_t height_) : width(width_), height(height_) {}
+    };
+
+    struct PushConstants {
+        uint32_t srcLayer;
+        uint32_t dstLayer;
+        ivec2    inputSize;
+        ivec2    outputSize;
+        uint32_t yOffset;   // Y plane offset
+        uint32_t cbOffset;  // Cb plane offset
+        uint32_t crOffset;  // Cr plane offset
+        uint32_t yPitch;    // Y plane pitch
+        uint32_t cbPitch;   // Cb plane pitch
+        uint32_t crPitch;   // Cr plane pitch
+    };
+
+    uint32_t width, height;
+    uint32_t rowPitch;
+    VkExtent3D inputExtent = inImageView->GetImageResource()->GetImageCreateInfo().extent;
+
+    if (pBufferImageCopy) {
+        width = pBufferImageCopy->bufferRowLength > 0 ?
+                pBufferImageCopy->bufferRowLength :
+                pBufferImageCopy->imageExtent.width;
+        height = pBufferImageCopy->bufferImageHeight > 0 ?
+                pBufferImageCopy->bufferImageHeight :
+                pBufferImageCopy->imageExtent.height;
+        rowPitch = width;
+    } else {
+        width = inputExtent.width;
+        height = inputExtent.height;
+        rowPitch = width;
+    }
+
+    VkDeviceSize planeSize = width * height;
+    VkDeviceSize yOffset = pBufferImageCopy ? pBufferImageCopy->bufferOffset : 0;
+    VkDeviceSize cbOffset = yOffset + planeSize;
+    VkDeviceSize crOffset = cbOffset + (planeSize / 4);
+
+    PushConstants pushConstants = {
+            inImageResourceInfo->baseArrayLayer,
+            0, // Destination layer (buffer has no layers)
+            ivec2(inputExtent.width, inputExtent.height),
+            ivec2(width, height),
+            static_cast<uint32_t>(yOffset),
+            static_cast<uint32_t>(cbOffset),
+            static_cast<uint32_t>(crOffset),
+            rowPitch,
+            rowPitch / 2,  // For 4:2:0 format
+            rowPitch / 2   // For 4:2:0 format
+    };
+
+    m_vkDevCtx->CmdPushConstants(cmdBuf,
+                               m_descriptorSetLayout.GetPipelineLayout(),
+                               VK_SHADER_STAGE_COMPUTE_BIT,
+                               0,
+                               sizeof(PushConstants),
+                               &pushConstants);
+
+    const uint32_t workgroupWidth = (pushConstants.outputSize.width + (m_workgroupSizeX - 1)) / m_workgroupSizeX;
+    const uint32_t workgroupHeight = (pushConstants.outputSize.height + (m_workgroupSizeY - 1)) / m_workgroupSizeY;
+    m_vkDevCtx->CmdDispatch(cmdBuf, workgroupWidth, workgroupHeight, 1);
+
+    return VK_SUCCESS;
+}
+
+// Buffer input -> Buffer output (all buffer case)
+VkResult VulkanFilterYuvCompute::RecordCommandBuffer(VkCommandBuffer cmdBuf,
+                                                     const VkBuffer*            inBuffers,
+                                                     uint32_t                   numInBuffers,
+                                                     const VkFormat*            inBufferFormats, // with size inBufferNumPlanes
+                                                     const VkSubresourceLayout* inBufferSubresourceLayouts,
+                                                     uint32_t                   numInPlanes,
+                                                     const VkExtent3D&          inBufferExtent,
+                                                     const VkBuffer*            outBuffers,
+                                                     uint32_t                   numOutBuffers,
+                                                     const VkFormat*            outBufferFormats,
+                                                     const VkSubresourceLayout* outBufferSubresourceLayouts,
+                                                     uint32_t                   numOutPlanes,
+                                                     const VkExtent3D&          outBufferExtent,
+                                                     uint32_t bufferIdx)
+{
+    assert(cmdBuf != VK_NULL_HANDLE);
+    assert(m_inputIsBuffer  == true);
+    assert(m_outputIsBuffer == true);
+
+    m_vkDevCtx->CmdBindPipeline(cmdBuf, VK_PIPELINE_BIND_POINT_COMPUTE, m_computePipeline.getPipeline());
+
+    VkDescriptorSetLayoutCreateFlags layoutMode = m_descriptorSetLayout.GetDescriptorSetLayoutInfo().GetDescriptorLayoutMode();
+
+    switch (layoutMode) {
+        case VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR:
+        case VK_DESCRIPTOR_SET_LAYOUT_CREATE_DESCRIPTOR_BUFFER_BIT_EXT:
+        {
+            VkDescriptorBufferInfo bufferDescriptors[maxNumComputeDescr]{};
+            std::array<VkWriteDescriptorSet, maxNumComputeDescr> writeDescriptorSets{};
+
+            uint32_t set = 0;
+            uint32_t descrIndex = 0;
+            uint32_t dstBinding = 0;
+
+            // Input buffer handling
+            // IN 0: Single buffer YCbCr
+            // IN 1: Y plane buffer
+            // IN 2: Cb, Cr or CbCr plane buffer
+            // IN 3: Cr plane buffer
+            UpdateBufferDescriptorSets(inBuffers, numInBuffers,
+                                       inBufferSubresourceLayouts, numInPlanes,
+                                       m_inputImageAspects,
+                                       descrIndex, dstBinding,
+                                       VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+                                       bufferDescriptors,
+                                       writeDescriptorSets,
+                                       maxNumComputeDescr / 2);
+
+            // Output buffer handling
+            dstBinding = 4;
+            // OUT 0: Single buffer YCbCr
+            // OUT 1: Y plane buffer
+            // OUT 2: Cb, Cr or CbCr plane buffer
+            // OUT 3: Cr or Cb plane buffer
+            UpdateBufferDescriptorSets(outBuffers, numOutBuffers,
+                                       outBufferSubresourceLayouts, numOutPlanes,
+                                       m_inputImageAspects,
+                                       descrIndex, dstBinding,
+                                       VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+                                       bufferDescriptors,
+                                       writeDescriptorSets,
+                                       maxNumComputeDescr);
+
+            assert(descrIndex <= maxNumComputeDescr);
+            assert(descrIndex >= 2);
+
+            if (layoutMode == VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR) {
+                m_vkDevCtx->CmdPushDescriptorSetKHR(cmdBuf, VK_PIPELINE_BIND_POINT_COMPUTE,
+                                                  m_descriptorSetLayout.GetPipelineLayout(),
+                                                  set, descrIndex, writeDescriptorSets.data());
+            } else {
+                VkDeviceOrHostAddressConstKHR descriptorBufferDeviceAddress =
+                      m_descriptorSetLayout.UpdateDescriptorBuffer(bufferIdx,
+                                                                 set,
+                                                                 descrIndex,
+                                                                 writeDescriptorSets.data());
+
+                // Descriptor buffer bindings
+                VkDescriptorBufferBindingInfoEXT bindingInfo{};
+                bindingInfo.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_BUFFER_BINDING_INFO_EXT;
+                bindingInfo.pNext = nullptr;
+                bindingInfo.address = descriptorBufferDeviceAddress.deviceAddress;
+                bindingInfo.usage = VK_BUFFER_USAGE_SAMPLER_DESCRIPTOR_BUFFER_BIT_EXT |
+                                    VK_BUFFER_USAGE_RESOURCE_DESCRIPTOR_BUFFER_BIT_EXT;
+                m_vkDevCtx->CmdBindDescriptorBuffersEXT(cmdBuf, 1, &bindingInfo);
+
+                uint32_t bufferIndexImage = 0;
+                VkDeviceSize bufferOffset = 0;
+                m_vkDevCtx->CmdSetDescriptorBufferOffsetsEXT(cmdBuf, VK_PIPELINE_BIND_POINT_COMPUTE,
+                                                           m_descriptorSetLayout.GetPipelineLayout(),
+                                                           set, 1, &bufferIndexImage, &bufferOffset);
+            }
+        }
+        break;
+
+        default:
+        m_vkDevCtx->CmdBindDescriptorSets(cmdBuf, VK_PIPELINE_BIND_POINT_COMPUTE,
+                                        m_descriptorSetLayout.GetPipelineLayout(),
+                                        0, 1, m_descriptorSetLayout.GetDescriptorSet(), 0, 0);
+    }
+
+    struct ivec2 {
+        uint32_t width;
+        uint32_t height;
+
+        ivec2() : width(0), height(0) {}
+        ivec2(int32_t width_, int32_t height_) : width(width_), height(height_) {}
+    };
+
+    struct PushConstants {
+        uint32_t srcLayer;    // src image layer to use
+        uint32_t dstLayer;    // dst image layer to use
+        ivec2    inputSize;   // input image or buffer extent
+        ivec2    outputSize;  // output image or buffer extent
+        uint32_t inYOffset;   // input  buffer Y plane offset
+        uint32_t inCbOffset;  // input  buffer Cb plane offset
+        uint32_t inCrOffset;  // input  buffer Cr plane offset
+        uint32_t inYPitch;    // input  buffer Y plane pitch
+        uint32_t inCbPitch;   // input  buffer Cb plane pitch
+        uint32_t inCrPitch;   // input  buffer Cr plane pitch
+        uint32_t outYOffset;   // output buffer Y plane offset
+        uint32_t outCbOffset;  // output buffer Cb plane offset
+        uint32_t outCrOffset;  // output buffer Cr plane offset
+        uint32_t outYPitch;    // output buffer Y plane pitch
+        uint32_t outCbPitch;   // output buffer Cb plane pitch
+        uint32_t outCrPitch;   // output buffer Cr plane pitch
+    };
+
+    // Calculate buffer parameters
+    uint32_t rowPitch = inBufferExtent.width;
+    VkDeviceSize planeSize = inBufferExtent.width * inBufferExtent.height;
+    VkDeviceSize yOffset = 0;
+    VkDeviceSize cbOffset = planeSize;
+    VkDeviceSize crOffset = cbOffset + (planeSize / 4);
+
+    PushConstants pushConstants = {
+            0, // Source layer (buffer has no layers)
+            0, // Destination layer (buffer has no layers)
+            ivec2(inBufferExtent.width, inBufferExtent.height),
+            ivec2(outBufferExtent.width, outBufferExtent.height),
+            static_cast<uint32_t>(yOffset),
+            static_cast<uint32_t>(cbOffset),
+            static_cast<uint32_t>(crOffset),
+            rowPitch,
+            rowPitch / 2,  // For 4:2:0 format
+            rowPitch / 2   // For 4:2:0 format
+    };
+
+    m_vkDevCtx->CmdPushConstants(cmdBuf,
+                               m_descriptorSetLayout.GetPipelineLayout(),
+                               VK_SHADER_STAGE_COMPUTE_BIT,
+                               0,
+                               sizeof(PushConstants),
+                               &pushConstants);
+
+    const uint32_t workgroupWidth = (pushConstants.outputSize.width + (m_workgroupSizeX - 1)) / m_workgroupSizeX;
+    const uint32_t workgroupHeight = (pushConstants.outputSize.height + (m_workgroupSizeY - 1)) / m_workgroupSizeY;
+    m_vkDevCtx->CmdDispatch(cmdBuf, workgroupWidth, workgroupHeight, 1);
+
+    return VK_SUCCESS;
+}
diff --git a/common/libs/VkCodecUtils/VulkanFilterYuvCompute.h b/common/libs/VkCodecUtils/VulkanFilterYuvCompute.h
index ef8db51a..ab9a8845 100644
--- a/common/libs/VkCodecUtils/VulkanFilterYuvCompute.h
+++ b/common/libs/VkCodecUtils/VulkanFilterYuvCompute.h
@@ -32,6 +32,15 @@ class VulkanFilterYuvCompute : public VulkanFilter
 public:
 
     enum FilterType { YCBCRCOPY, YCBCRCLEAR, YCBCR2RGBA, RGBA2YCBCR };
+    static constexpr uint32_t maxNumComputeDescr = 8;
+
+    static constexpr VkImageAspectFlags validPlaneAspects = VK_IMAGE_ASPECT_PLANE_0_BIT |
+                                                            VK_IMAGE_ASPECT_PLANE_1_BIT |
+                                                            VK_IMAGE_ASPECT_PLANE_2_BIT;
+
+    static constexpr VkImageAspectFlags validAspects = VK_IMAGE_ASPECT_COLOR_BIT | validPlaneAspects;
+
+    static uint32_t GetPlaneIndex(VkImageAspectFlagBits planeAspect);
 
     static VkResult Create(const VulkanDeviceContext* vkDevCtx,
                            uint32_t queueFamilyIndex,
@@ -40,6 +49,8 @@ class VulkanFilterYuvCompute : public VulkanFilter
                            uint32_t maxNumFrames,
                            VkFormat inputFormat,
                            VkFormat outputFormat,
+                           bool inputEnableMsbToLsbShift,
+                           bool outputEnableLsbToMsbShift,
                            const VkSamplerYcbcrConversionCreateInfo* pYcbcrConversionCreateInfo,
                            const YcbcrPrimariesConstants* pYcbcrPrimariesConstants,
                            const VkSamplerCreateInfo* pSamplerCreateInfo,
@@ -52,6 +63,8 @@ class VulkanFilterYuvCompute : public VulkanFilter
                            uint32_t maxNumFrames,
                            VkFormat inputFormat,
                            VkFormat outputFormat,
+                           bool inputEnableMsbToLsbShift,
+                           bool outputEnableLsbToMsbShift,
                            const YcbcrPrimariesConstants* pYcbcrPrimariesConstants)
         : VulkanFilter(vkDevCtx, queueFamilyIndex, queueIndex)
         , m_filterType(filterType)
@@ -71,7 +84,11 @@ class VulkanFilterYuvCompute : public VulkanFilter
                                 VK_IMAGE_ASPECT_PLANE_0_BIT |
                                 VK_IMAGE_ASPECT_PLANE_1_BIT |
                                 VK_IMAGE_ASPECT_PLANE_2_BIT)
+        , m_inputEnableMsbToLsbShift(inputEnableMsbToLsbShift)
+        , m_outputEnableLsbToMsbShift(outputEnableLsbToMsbShift)
         , m_enableRowAndColumnReplication(true)
+        , m_inputIsBuffer(false)
+        , m_outputIsBuffer(false)
     {
         // FIXME: m_ycbcrPrimariesConstants is currently unused but is kept for future use.
         (void)m_ycbcrPrimariesConstants;
@@ -116,263 +133,205 @@ class VulkanFilterYuvCompute : public VulkanFilter
         assert(m_vkDevCtx != nullptr);
     }
 
+    uint32_t UpdateBufferDescriptorSets(const VkBuffer*            vkBuffers,
+                                        uint32_t                   numVkBuffers,
+                                        const VkSubresourceLayout* vkBufferSubresourceLayout,
+                                        uint32_t                   numPlanes,
+                                        VkImageAspectFlags         validImageAspects,
+                                        uint32_t&                  descrIndex,
+                                        uint32_t&                  baseBinding,
+                                        VkDescriptorType           descriptorType, // Ex: VK_DESCRIPTOR_TYPE_STORAGE_BUFFER
+                                        VkDescriptorBufferInfo bufferDescriptors[maxNumComputeDescr],
+                                        std::array<VkWriteDescriptorSet, maxNumComputeDescr>& writeDescriptorSets,
+                                        const uint32_t maxDescriptors = maxNumComputeDescr);
+
+    uint32_t  UpdateImageDescriptorSets(const VkImageResourceView* inputImageView,
+                                        VkImageAspectFlags         validImageAspects,
+                                        VkSampler                  convSampler,
+                                        VkImageLayout              imageLayout,
+                                        uint32_t&                  descrIndex,
+                                        uint32_t&                  baseBinding,
+                                        VkDescriptorType           descriptorType, // Ex: VK_DESCRIPTOR_TYPE_STORAGE_IMAGE
+                                        VkDescriptorImageInfo      imageDescriptors[maxNumComputeDescr],
+                                        std::array<VkWriteDescriptorSet, maxNumComputeDescr>& writeDescriptorSets,
+                                        const uint32_t maxDescriptors = maxNumComputeDescr);
+
+    // Image input -> Image output
     virtual VkResult RecordCommandBuffer(VkCommandBuffer cmdBuf,
                                          const VkImageResourceView* inputImageView,
                                          const VkVideoPictureResourceInfoKHR * inputImageResourceInfo,
                                          const VkImageResourceView* outputImageView,
                                          const VkVideoPictureResourceInfoKHR * outputImageResourceInfo,
-                                         uint32_t bufferIdx)
-    {
-
-        assert(cmdBuf != VK_NULL_HANDLE);
-
-        m_vkDevCtx->CmdBindPipeline(cmdBuf, VK_PIPELINE_BIND_POINT_COMPUTE, m_computePipeline.getPipeline());
-
-        VkDescriptorSetLayoutCreateFlags layoutMode = m_descriptorSetLayout.GetDescriptorSetLayoutInfo().GetDescriptorLayoutMode();
-
-        switch (layoutMode) {
-            case VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR:
-            case VK_DESCRIPTOR_SET_LAYOUT_CREATE_DESCRIPTOR_BUFFER_BIT_EXT:
-            {
-
-                const uint32_t maxNumComputeDescr = 8;
-                VkDescriptorImageInfo imageDescriptors[8]{};
-                std::array<VkWriteDescriptorSet, maxNumComputeDescr> writeDescriptorSets{};
-
-                // Images
-                uint32_t set = 0;
-                uint32_t descrIndex = 0;
-                uint32_t dstBinding = 0;
-                // RGBA color converted by an YCbCr sample
-                if (m_inputImageAspects & VK_IMAGE_ASPECT_COLOR_BIT) {
-                    writeDescriptorSets[descrIndex].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
-                    writeDescriptorSets[descrIndex].dstSet = VK_NULL_HANDLE;
-                    writeDescriptorSets[descrIndex].dstBinding = dstBinding;
-                    writeDescriptorSets[descrIndex].descriptorCount = 1;
-                    writeDescriptorSets[descrIndex].descriptorType = (m_samplerYcbcrConversion.GetSampler() != VK_NULL_HANDLE) ?
-                                                                        VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER :
-                                                                        VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
-
-                    imageDescriptors[descrIndex].sampler = m_samplerYcbcrConversion.GetSampler();
-                    imageDescriptors[descrIndex].imageView = inputImageView->GetImageView();
-                    assert(imageDescriptors[descrIndex].imageView);
-                    imageDescriptors[descrIndex].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
-                    writeDescriptorSets[descrIndex].pImageInfo = &imageDescriptors[descrIndex]; // RGBA or Sampled YCbCr
-                    descrIndex++;
-                }
-                dstBinding++;
-
-                uint32_t planeNum = 0;
-                // y plane - G -> R8
-                if ((m_inputImageAspects & (VK_IMAGE_ASPECT_PLANE_0_BIT << planeNum)) &&
-                        (planeNum < inputImageView->GetNumberOfPlanes())) {
-                    writeDescriptorSets[descrIndex].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
-                    writeDescriptorSets[descrIndex].dstSet = VK_NULL_HANDLE;
-                    writeDescriptorSets[descrIndex].dstBinding = dstBinding;
-                    writeDescriptorSets[descrIndex].descriptorCount = 1;
-                    writeDescriptorSets[descrIndex].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
-                    imageDescriptors[descrIndex].sampler = VK_NULL_HANDLE;
-                    imageDescriptors[descrIndex].imageView = inputImageView->GetPlaneImageView(planeNum++);
-                    assert(imageDescriptors[descrIndex].imageView);
-                    imageDescriptors[descrIndex].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
-                    writeDescriptorSets[descrIndex].pImageInfo = &imageDescriptors[descrIndex]; // Y (0) plane
-                    descrIndex++;
-                }
-                dstBinding++;
-
-                // CbCr plane - BR -> R8B8
-                if ((m_inputImageAspects & (VK_IMAGE_ASPECT_PLANE_0_BIT << planeNum)) &&
-                        (planeNum < inputImageView->GetNumberOfPlanes())) {
-                    writeDescriptorSets[descrIndex].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
-                    writeDescriptorSets[descrIndex].dstSet = VK_NULL_HANDLE;
-                    writeDescriptorSets[descrIndex].dstBinding = dstBinding;
-                    writeDescriptorSets[descrIndex].descriptorCount = 1;
-                    writeDescriptorSets[descrIndex].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
-                    imageDescriptors[descrIndex].sampler = VK_NULL_HANDLE;
-                    imageDescriptors[descrIndex].imageView = inputImageView->GetPlaneImageView(planeNum++);
-                    assert(imageDescriptors[descrIndex].imageView);
-                    imageDescriptors[descrIndex].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
-                    writeDescriptorSets[descrIndex].pImageInfo = &imageDescriptors[descrIndex]; // CbCr (1) plane
-                    descrIndex++;
-                }
-                dstBinding++;
-
-                // Cr plane - R -> R8
-                if ((m_inputImageAspects & (VK_IMAGE_ASPECT_PLANE_0_BIT << planeNum)) &&
-                        (planeNum < inputImageView->GetNumberOfPlanes())) {
-                    writeDescriptorSets[descrIndex].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
-                    writeDescriptorSets[descrIndex].dstSet = VK_NULL_HANDLE;
-                    writeDescriptorSets[descrIndex].dstBinding = dstBinding;
-                    writeDescriptorSets[descrIndex].descriptorCount = 1;
-                    writeDescriptorSets[descrIndex].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
-                    imageDescriptors[descrIndex].sampler = VK_NULL_HANDLE;
-                    imageDescriptors[descrIndex].imageView = inputImageView->GetPlaneImageView(planeNum++);
-                    assert(imageDescriptors[descrIndex].imageView);
-                    imageDescriptors[descrIndex].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
-                    writeDescriptorSets[descrIndex].pImageInfo = &imageDescriptors[descrIndex]; // CbCr (1) plane
-                    descrIndex++;
-                }
-                dstBinding++;
-
-                // Out RGBA or single planar YCbCr image
-                if (m_outputImageAspects & VK_IMAGE_ASPECT_COLOR_BIT) {
-                    writeDescriptorSets[descrIndex].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
-                    writeDescriptorSets[descrIndex].dstSet = VK_NULL_HANDLE;
-                    writeDescriptorSets[descrIndex].dstBinding = dstBinding;
-                    writeDescriptorSets[descrIndex].descriptorCount = 1;
-                    writeDescriptorSets[descrIndex].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
-                    imageDescriptors[descrIndex].sampler = VK_NULL_HANDLE;
-                    imageDescriptors[descrIndex].imageView = outputImageView->GetImageView();
-                    imageDescriptors[descrIndex].imageLayout = VK_IMAGE_LAYOUT_GENERAL;
-                    writeDescriptorSets[descrIndex].pImageInfo = &imageDescriptors[descrIndex];
-                    descrIndex++;
-                }
-                dstBinding++;
-
-                planeNum = 0;
-                // y plane out - G -> R8
-                if ((m_outputImageAspects & (VK_IMAGE_ASPECT_PLANE_0_BIT << planeNum)) &&
-                        (planeNum < outputImageView->GetNumberOfPlanes())) {
-                    writeDescriptorSets[descrIndex].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
-                    writeDescriptorSets[descrIndex].dstSet = VK_NULL_HANDLE;
-                    writeDescriptorSets[descrIndex].dstBinding = dstBinding;
-                    writeDescriptorSets[descrIndex].descriptorCount = 1;
-                    writeDescriptorSets[descrIndex].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
-                    imageDescriptors[descrIndex].sampler = VK_NULL_HANDLE;
-                    imageDescriptors[descrIndex].imageView = outputImageView->GetPlaneImageView(planeNum++);
-                    assert(imageDescriptors[descrIndex].imageView);
-                    imageDescriptors[descrIndex].imageLayout = VK_IMAGE_LAYOUT_GENERAL;
-                    writeDescriptorSets[descrIndex].pImageInfo = &imageDescriptors[descrIndex];
-                    descrIndex++;
-                }
-                dstBinding++;
-
-                // CbCr plane out - BR -> R8B8
-                if ((m_outputImageAspects & (VK_IMAGE_ASPECT_PLANE_0_BIT << planeNum)) &&
-                        (planeNum < outputImageView->GetNumberOfPlanes())) {
-                    writeDescriptorSets[descrIndex].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
-                    writeDescriptorSets[descrIndex].dstSet = VK_NULL_HANDLE;
-                    writeDescriptorSets[descrIndex].dstBinding = dstBinding;
-                    writeDescriptorSets[descrIndex].descriptorCount = 1;
-                    writeDescriptorSets[descrIndex].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
-                    imageDescriptors[descrIndex].sampler = VK_NULL_HANDLE;
-                    imageDescriptors[descrIndex].imageView = outputImageView->GetPlaneImageView(planeNum++);
-                    assert(imageDescriptors[descrIndex].imageView);
-                    imageDescriptors[descrIndex].imageLayout = VK_IMAGE_LAYOUT_GENERAL;
-                    writeDescriptorSets[descrIndex].pImageInfo = &imageDescriptors[descrIndex];
-                    descrIndex++;
-                }
-                dstBinding++;
-
-                // Cr plane out - R -> R8
-                if ((m_outputImageAspects & (VK_IMAGE_ASPECT_PLANE_0_BIT << planeNum)) &&
-                        (planeNum < outputImageView->GetNumberOfPlanes())) {
-                    writeDescriptorSets[descrIndex].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
-                    writeDescriptorSets[descrIndex].dstSet = VK_NULL_HANDLE;
-                    writeDescriptorSets[descrIndex].dstBinding = dstBinding;
-                    writeDescriptorSets[descrIndex].descriptorCount = 1;
-                    writeDescriptorSets[descrIndex].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
-                    imageDescriptors[descrIndex].sampler = VK_NULL_HANDLE;
-                    imageDescriptors[descrIndex].imageView = outputImageView->GetPlaneImageView(planeNum++);
-                    assert(imageDescriptors[descrIndex].imageView);
-                    imageDescriptors[descrIndex].imageLayout = VK_IMAGE_LAYOUT_GENERAL;
-                    writeDescriptorSets[descrIndex].pImageInfo = &imageDescriptors[descrIndex];
-                    descrIndex++;
-                }
-                dstBinding++;
-
-                assert(descrIndex <= maxNumComputeDescr);
-                assert(descrIndex >= 2);
-
-                if (layoutMode ==  VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR) {
-                    m_vkDevCtx->CmdPushDescriptorSetKHR(cmdBuf, VK_PIPELINE_BIND_POINT_COMPUTE,
-                                                        m_descriptorSetLayout.GetPipelineLayout(),
-                                                        set, descrIndex, writeDescriptorSets.data());
-                } else {
-
-                    VkDeviceOrHostAddressConstKHR imageDescriptorBufferDeviceAddress =
-                          m_descriptorSetLayout.UpdateDescriptorBuffer(bufferIdx,
-                                                                       set,
-                                                                       descrIndex,
-                                                                       writeDescriptorSets.data());
-
-
-                    // Descriptor buffer bindings
-                    // Set 0 = Image
-                    VkDescriptorBufferBindingInfoEXT bindingInfo{};
-                    bindingInfo.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_BUFFER_BINDING_INFO_EXT;
-                    bindingInfo.pNext = nullptr;
-                    bindingInfo.address = imageDescriptorBufferDeviceAddress.deviceAddress;
-                    bindingInfo.usage = VK_BUFFER_USAGE_SAMPLER_DESCRIPTOR_BUFFER_BIT_EXT |
-                                        VK_BUFFER_USAGE_RESOURCE_DESCRIPTOR_BUFFER_BIT_EXT;
-                    m_vkDevCtx->CmdBindDescriptorBuffersEXT(cmdBuf, 1, &bindingInfo);
-
-                    // Image (set 0)
-                    uint32_t bufferIndexImage = 0;
-                    VkDeviceSize bufferOffset = 0;
-                    m_vkDevCtx->CmdSetDescriptorBufferOffsetsEXT(cmdBuf, VK_PIPELINE_BIND_POINT_COMPUTE,
-                                                               m_descriptorSetLayout.GetPipelineLayout(),
-                                                               set, 1, &bufferIndexImage, &bufferOffset);
-                }
-            }
-            break;
-
-            default:
-            m_vkDevCtx->CmdBindDescriptorSets(cmdBuf, VK_PIPELINE_BIND_POINT_COMPUTE,
-                                              m_descriptorSetLayout.GetPipelineLayout(),
-                                              0, 1, m_descriptorSetLayout.GetDescriptorSet(), 0, 0);
-        }
-
-        struct ivec2 {
-            uint32_t width;
-            uint32_t height;
-
-            ivec2() : width(0), height(0) {}
-            ivec2(int32_t width_, int32_t height_) : width(width_), height(height_) {}
-        };
-
-        struct PushConstants {
-            uint32_t srcLayer;
-            uint32_t dstLayer;
-            ivec2    inputSize;  // Original input image size (width, height)
-            ivec2    outputSize; // Output image size (width, height, with padding)
-        };
-
-        PushConstants pushConstants = {
-                inputImageResourceInfo->baseArrayLayer, // Set the source layer index
-                outputImageResourceInfo->baseArrayLayer, // Set the destination layer index
-                ivec2(inputImageResourceInfo->codedExtent.width, inputImageResourceInfo->codedExtent.height),
-                ivec2(outputImageResourceInfo->codedExtent.width, outputImageResourceInfo->codedExtent.height)
-        };
-
-        m_vkDevCtx->CmdPushConstants(cmdBuf,
-                                     m_descriptorSetLayout.GetPipelineLayout(),
-                                     VK_SHADER_STAGE_COMPUTE_BIT,
-                                     0, // offset
-                                     sizeof(PushConstants),
-                                     &pushConstants);
-
-        const uint32_t  workgroupWidth  = (pushConstants.outputSize.width  + (m_workgroupSizeX - 1)) / m_workgroupSizeX;
-        const uint32_t  workgroupHeight = (pushConstants.outputSize.height + (m_workgroupSizeY - 1)) / m_workgroupSizeY;
-        m_vkDevCtx->CmdDispatch(cmdBuf, workgroupWidth, workgroupHeight, 1);
-
-        return VK_SUCCESS;
-    }
+                                         uint32_t bufferIdx) override;
+    // Buffer input -> Image output
+    VkResult RecordCommandBuffer(VkCommandBuffer cmdBuf,
+                                const VkBuffer*            inBuffers,     // with size numInBuffers
+                                uint32_t                   numInBuffers,
+                                const VkFormat*            inBufferFormats, // with size inBufferNumPlanes
+                                const VkSubresourceLayout* inBufferSubresourceLayouts, // with size inBufferNumPlanes
+                                uint32_t                   inBufferNumPlanes,
+                                const VkImageResourceView* outImageView,
+                                const VkVideoPictureResourceInfoKHR* outImageResourceInfo,
+                                const VkBufferImageCopy*   pBufferImageCopy,
+                                uint32_t bufferIdx);
+
+    // Image input -> Buffer output
+    VkResult RecordCommandBuffer(VkCommandBuffer cmdBuf,
+                                const VkImageResourceView* inImageView,
+                                const VkVideoPictureResourceInfoKHR* inImageResourceInfo,
+                                const VkBuffer*            outBuffers,        // with size numOutBuffers
+                                uint32_t                   numOutBuffers,
+                                const VkFormat*            inBufferFormats,   // with size outBufferNumPlanes
+                                const VkSubresourceLayout* outBufferSubresourceLayouts, // with size outBufferNumPlanes
+                                uint32_t                   outBufferNumPlanes,
+                                const VkBufferImageCopy*   pBufferImageCopy,
+                                uint32_t bufferIdx);
+
+    // Buffer input -> Buffer output
+    VkResult RecordCommandBuffer(VkCommandBuffer cmdBuf,
+                                 const VkBuffer*            inBuffers,       // with size numInBuffers
+                                 uint32_t                   numInBuffers,
+                                 const VkFormat*            inBufferFormats, // with size inBufferNumPlanes
+                                 const VkSubresourceLayout* inBufferSubresourceLayouts, // with size inBufferNumPlanes
+                                 uint32_t                   inBufferNumPlanes,
+                                 const VkExtent3D&          inBufferExtent,
+                                 const VkBuffer*            outBuffers,        // with size numOutBuffers
+                                 uint32_t                   numOutBuffers,
+                                 const VkFormat*            outBufferFormats,   // with size outBufferNumPlanes
+                                 const VkSubresourceLayout* outBufferSubresourceLayouts, // with size outBufferNumPlanes
+                                 uint32_t                   outBufferNumPlanes,
+                                 const VkExtent3D&          outBufferExtent,
+                                 uint32_t bufferIdx);
 
 private:
     VkResult InitDescriptorSetLayout(uint32_t maxNumFrames);
-    void ShaderGeneratePlaneDescriptors(std::stringstream& computeShader,
-                                          VkImageAspectFlags& imageAspects,
-                                          const char *imageName,
-                                          VkFormat    imageFormat,
-                                          bool isInput,
-                                          uint32_t startBinding = 0,
-                                          uint32_t set = 0,
-                                          bool imageArray = true);
+
+    /**
+     * @brief Generates GLSL image descriptor bindings for shader input/output
+     *
+     * Creates appropriate GLSL image binding declarations based on the input/output format.
+     * Handles different YUV formats like single-plane (RGBA), 2-plane (NV12/NV21), and 3-plane (I420, etc).
+     *
+     * @param computeShader Output stringstream for shader code
+     * @param imageAspects Output parameter to store the image aspect flags used
+     * @param imageName Base image variable name
+     * @param imageFormat Vulkan format of the image
+     * @param isInput Whether this is an input or output resource
+     * @param startBinding Starting binding number in the descriptor set
+     * @param set Descriptor set number
+     * @param imageArray Whether to use image2DArray or image2D
+     * @return The next available binding number after all descriptors are created
+     */
+    uint32_t ShaderGenerateImagePlaneDescriptors(std::stringstream& computeShader,
+                                                 VkImageAspectFlags& imageAspects,
+                                                 const char *imageName,
+                                                 VkFormat    imageFormat,
+                                                 bool isInput,
+                                                 uint32_t startBinding = 0,
+                                                 uint32_t set = 0,
+                                                 bool imageArray = true);
+
+    /**
+     * @brief Generates GLSL buffer descriptor bindings for shader input/output
+     *
+     * Creates appropriate GLSL buffer binding declarations based on the input/output format.
+     * Handles different YUV buffer layouts matching single-plane, 2-plane, or 3-plane formats.
+     *
+     * @param shaderStr Output stringstream for shader code
+     * @param imageAspects Output parameter to store the image aspect flags used
+     * @param bufferName Base buffer variable name
+     * @param bufferFormat Vulkan format of the buffer data
+     * @param isInput Whether this is an input or output resource
+     * @param startBinding Starting binding number in the descriptor set
+     * @param set Descriptor set number
+     * @param bufferType The Vulkan descriptor type to use for the buffer
+     * @return The next available binding number after all descriptors are created
+     */
+    uint32_t ShaderGenerateBufferPlaneDescriptors(std::stringstream& shaderStr,
+                                                  VkImageAspectFlags& imageAspects,
+                                                  const char *bufferName,
+                                                  VkFormat    bufferFormat,
+                                                  bool isInput,
+                                                  uint32_t startBinding = 0,
+                                                  uint32_t set = 0,
+                                                  VkDescriptorType bufferType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
+
+    /**
+     * @brief Unified descriptor generation for either buffer or image resources
+     *
+     * Delegates to either ShaderGenerateImagePlaneDescriptors or ShaderGenerateBufferPlaneDescriptors
+     * based on the resource type (image or buffer) needed for input/output.
+     *
+     * @param shaderStr Output stringstream for shader code
+     * @param isInput Whether this is an input or output resource
+     * @param startBinding Starting binding number in the descriptor set
+     * @param set Descriptor set number
+     * @param imageArray Whether to use image2DArray or image2D (for image resources)
+     * @param bufferType The Vulkan descriptor type to use for buffer resources
+     * @return The next available binding number after all descriptors are created
+     */
+    uint32_t ShaderGeneratePlaneDescriptors(std::stringstream& shaderStr,
+                                            bool isInput,
+                                            uint32_t startBinding,
+                                            uint32_t set,
+                                            bool imageArray,
+                                            VkDescriptorType bufferType);
+
+    /**
+     * @brief Initializes GLSL shader for YCbCr copy operation
+     *
+     * Generates a compute shader that copies YCbCr data from input to output
+     * without any color space conversion, preserving the format.
+     *
+     * @param computeShader Output string for the complete GLSL shader code
+     * @return Size of the generated shader code in bytes
+     */
     size_t InitYCBCRCOPY(std::string& computeShader);
+
+    /**
+     * @brief Initializes GLSL shader for YCbCr clear operation
+     *
+     * Generates a compute shader that clears/fills YCbCr data in the output
+     * resource with constant values.
+     *
+     * @param computeShader Output string for the complete GLSL shader code
+     * @return Size of the generated shader code in bytes
+     */
     size_t InitYCBCRCLEAR(std::string& computeShader);
+
+    /**
+     * @brief Initializes GLSL shader for YCbCr to RGBA conversion
+     *
+     * Generates a compute shader that converts YCbCr input to RGBA output
+     * using the appropriate color space conversion matrix.
+     *
+     * @param computeShader Output string for the complete GLSL shader code
+     * @return Size of the generated shader code in bytes
+     */
     size_t InitYCBCR2RGBA(std::string& computeShader);
 
+    /**
+     * @brief Initializes GLSL shader for RGBA to YCbCr conversion
+     *
+     * Generates a compute shader that converts RGBA input to YCbCr output
+     * using the appropriate color space conversion matrix.
+     *
+     * @param computeShader Output string for the complete GLSL shader code
+     * @return Size of the generated shader code in bytes
+     */
+    size_t InitRGBA2YCBCR(std::string& computeShader);
+
+    /**
+     * @brief Initializes GLSL shader for YUV to NV12 conversion using buffer input
+     *
+     * Generates a compute shader that converts YUV input from buffer to NV12 output,
+     * handling different YUV formats (I420, I422, I444) with appropriate chroma subsampling.
+     *
+     * @param computeShader Output string for the complete GLSL shader code
+     * @return Size of the generated shader code in bytes
+     */
+    size_t InitYUV2NV12FromBuffer(std::string& computeShader);
+
 private:
     const FilterType                         m_filterType;
     VkFormat                                 m_inputFormat;
@@ -386,8 +345,32 @@ class VulkanFilterYuvCompute : public VulkanFilter
     VulkanComputePipeline                    m_computePipeline;
     VkImageAspectFlags                       m_inputImageAspects;
     VkImageAspectFlags                       m_outputImageAspects;
+    uint32_t                                 m_inputEnableMsbToLsbShift : 1;
+    uint32_t                                 m_outputEnableLsbToMsbShift : 1;
     uint32_t                                 m_enableRowAndColumnReplication : 1;
-
+    uint32_t                                 m_inputIsBuffer : 1;
+    uint32_t                                 m_outputIsBuffer : 1;
+
+    struct PushConstants {
+        uint32_t srcLayer;        // src image layer to use
+        uint32_t dstLayer;        // dst image layer to use
+        uint32_t inputWidth;      // input image or buffer width
+        uint32_t inputHeight;     // input image or buffer height
+        uint32_t outputWidth;     // output image or buffer width
+        uint32_t outputHeight;    // output image or buffer height
+        uint32_t inYOffset;       // input buffer Y plane offset
+        uint32_t inCbOffset;      // input buffer Cb plane offset
+        uint32_t inCrOffset;      // input buffer Cr plane offset
+        uint32_t inYPitch;        // input buffer Y plane pitch
+        uint32_t inCbPitch;       // input buffer Cb plane pitch
+        uint32_t inCrPitch;       // input buffer Cr plane pitch
+        uint32_t outYOffset;      // output buffer Y plane offset
+        uint32_t outCbOffset;     // output buffer Cb plane offset
+        uint32_t outCrOffset;     // output buffer Cr plane offset
+        uint32_t outYPitch;       // output buffer Y plane pitch
+        uint32_t outCbPitch;      // output buffer Cb plane pitch
+        uint32_t outCrPitch;      // output buffer Cr plane pitch
+    };
 };
 
 #endif /* _VULKANFILTERYUVCOMPUTE_H_ */
diff --git a/vk_video_decoder/libs/VkVideoDecoder/VkVideoDecoder.cpp b/vk_video_decoder/libs/VkVideoDecoder/VkVideoDecoder.cpp
index 2b9f6b66..c855386a 100644
--- a/vk_video_decoder/libs/VkVideoDecoder/VkVideoDecoder.cpp
+++ b/vk_video_decoder/libs/VkVideoDecoder/VkVideoDecoder.cpp
@@ -334,16 +334,18 @@ int32_t VkVideoDecoder::StartVideoSequence(VkParserDetectedVideoFormat* pVideoFo
 
         if (needNewFilter) {
             result = VulkanFilterYuvCompute::Create(m_vkDevCtx,
-                                                    m_vkDevCtx->GetComputeQueueFamilyIdx(),
-                                                    0,
-                                                    m_filterType,
-                                                    numDecodeSurfaces + 1,
-                                                    inputFormat,
-                                                    outputFormat,
-                                                    &ycbcrConversionCreateInfo,
-                                                    &ycbcrPrimariesConstants,
-                                                    &samplerInfo,
-                                                    m_yuvFilter);
+                                                m_vkDevCtx->GetComputeQueueFamilyIdx(),
+                                                0,
+                                                m_filterType,
+                                                numDecodeSurfaces + 1,
+                                                inputFormat,
+                                                outputFormat,
+                                                false, // inputEnableMsbToLsbShift
+                                                false, // outputEnableLsbToMsbShift
+                                                &ycbcrConversionCreateInfo,
+                                                &ycbcrPrimariesConstants,
+                                                &samplerInfo,
+                                                m_yuvFilter);
         }
         if (result == VK_SUCCESS) {
 
diff --git a/vk_video_encoder/libs/VkVideoEncoder/VkVideoEncoder.cpp b/vk_video_encoder/libs/VkVideoEncoder/VkVideoEncoder.cpp
index 8649df07..84e83dfa 100644
--- a/vk_video_encoder/libs/VkVideoEncoder/VkVideoEncoder.cpp
+++ b/vk_video_encoder/libs/VkVideoEncoder/VkVideoEncoder.cpp
@@ -164,96 +164,22 @@ VkResult VkVideoEncoder::LoadNextFrame(VkSharedBaseObj<VkVideoEncodeFrameInfo>&
 
     const uint8_t* pInputFrameData = m_encoderConfig->inputFileHandler.GetMappedPtr(m_encoderConfig->input.fullImageSize, encodeFrameInfo->frameInputOrderNum);
 
+    // NOTE: Get image layout
     const VkSubresourceLayout* dstSubresourceLayout = dstImageResource->GetSubresourceLayout();
 
-    int yCbCrConvResult = 0;
-    if (m_encoderConfig->input.bpp == 8) {
-
-        if (m_encoderConfig->encodeChromaSubsampling == VK_VIDEO_CHROMA_SUBSAMPLING_444_BIT_KHR) {
-            // Load current 8-bit frame from file and convert to 2-plane YUV444
-            yCbCrConvResult = YCbCrConvUtilsCpu<uint8_t>::I444ToP444(
-                    pInputFrameData + m_encoderConfig->input.planeLayouts[0].offset,         // src_y
-                    (int)m_encoderConfig->input.planeLayouts[0].rowPitch,                    // src_stride_y
-                    pInputFrameData + m_encoderConfig->input.planeLayouts[1].offset,         // src_u
-                    (int)m_encoderConfig->input.planeLayouts[1].rowPitch,                    // src_stride_u
-                    pInputFrameData + m_encoderConfig->input.planeLayouts[2].offset,         // src_v
-                    (int)m_encoderConfig->input.planeLayouts[2].rowPitch,                    // src_stride_v
-                    writeImagePtr + dstSubresourceLayout[0].offset,                          // dst_y
-                    (int)dstSubresourceLayout[0].rowPitch,                                   // dst_stride_y
-                    writeImagePtr + dstSubresourceLayout[1].offset,                          // dst_uv
-                    (int)dstSubresourceLayout[1].rowPitch,                                   // dst_stride_uv
-                    std::min(m_encoderConfig->encodeWidth,  m_encoderConfig->input.width),   // width
-                    std::min(m_encoderConfig->encodeHeight, m_encoderConfig->input.height)); // height
-        } else {
-            // Load current 8-bit frame from file and convert to NV12
-            yCbCrConvResult = YCbCrConvUtilsCpu<uint8_t>::I420ToNV12(
-                    pInputFrameData + m_encoderConfig->input.planeLayouts[0].offset,         // src_y,
-                    (int)m_encoderConfig->input.planeLayouts[0].rowPitch,                    // src_stride_y,
-                    pInputFrameData + m_encoderConfig->input.planeLayouts[1].offset,         // src_u,
-                    (int)m_encoderConfig->input.planeLayouts[1].rowPitch,                    // src_stride_u,
-                    pInputFrameData + m_encoderConfig->input.planeLayouts[2].offset,         // src_v,
-                    (int)m_encoderConfig->input.planeLayouts[2].rowPitch,                    // src_stride_v,
-                    writeImagePtr + dstSubresourceLayout[0].offset,                          // dst_y,
-                    (int)dstSubresourceLayout[0].rowPitch,                                   // dst_stride_y,
-                    writeImagePtr + dstSubresourceLayout[1].offset,                          // dst_uv,
-                    (int)dstSubresourceLayout[1].rowPitch,                                   // dst_stride_uv,
-                    std::min(m_encoderConfig->encodeWidth,  m_encoderConfig->input.width),   // width
-                    std::min(m_encoderConfig->encodeHeight, m_encoderConfig->input.height)); // height
-        }
-
-    } else if (m_encoderConfig->input.bpp == 10) { // 10-bit - actually 16-bit only for now.
-
-        int shiftBits = 0;
-        if (m_encoderConfig->input.msbShift >= 0) {
-            shiftBits = m_encoderConfig->input.msbShift;
-        } else {
-            shiftBits = 16 - m_encoderConfig->input.bpp;
-        }
-
-        if (m_encoderConfig->encodeChromaSubsampling == VK_VIDEO_CHROMA_SUBSAMPLING_444_BIT_KHR) {
-            // Load current 10-bit frame from file and convert to 2-plane YUV444
-            yCbCrConvResult = YCbCrConvUtilsCpu<uint16_t>::I444ToP444(
-                    (const uint16_t*)(pInputFrameData + m_encoderConfig->input.planeLayouts[0].offset), // src_y
-                    (int)m_encoderConfig->input.planeLayouts[0].rowPitch,                               // src_stride_y
-                    (const uint16_t*)(pInputFrameData + m_encoderConfig->input.planeLayouts[1].offset), // src_u
-                    (int)m_encoderConfig->input.planeLayouts[1].rowPitch,                               // src_stride_u
-                    (const uint16_t*)(pInputFrameData + m_encoderConfig->input.planeLayouts[2].offset), // src_v
-                    (int)m_encoderConfig->input.planeLayouts[2].rowPitch,                               // src_stride_v
-                    (uint16_t*)(writeImagePtr + dstSubresourceLayout[0].offset),                        // dst_y
-                    (int)dstSubresourceLayout[0].rowPitch,                                              // dst_stride_y
-                    (uint16_t*)(writeImagePtr + dstSubresourceLayout[1].offset),                        // dst_uv
-                    (int)dstSubresourceLayout[1].rowPitch,                                              // dst_stride_uv
-                    std::min(m_encoderConfig->encodeWidth,  m_encoderConfig->input.width),              // width
-                    std::min(m_encoderConfig->encodeHeight, m_encoderConfig->input.height),             // height
-                    shiftBits);
-        } else {
-            // Load current 10-bit frame from file and convert to P010/P016
-            yCbCrConvResult = YCbCrConvUtilsCpu<uint16_t>::I420ToNV12(
-                    (const uint16_t*)(pInputFrameData + m_encoderConfig->input.planeLayouts[0].offset), // src_y,
-                    (int)m_encoderConfig->input.planeLayouts[0].rowPitch,                               // src_stride_y,
-                    (const uint16_t*)(pInputFrameData + m_encoderConfig->input.planeLayouts[1].offset), // src_u,
-                    (int)m_encoderConfig->input.planeLayouts[1].rowPitch,                               // src_stride_u,
-                    (const uint16_t*)(pInputFrameData + m_encoderConfig->input.planeLayouts[2].offset), // src_v,
-                    (int)m_encoderConfig->input.planeLayouts[2].rowPitch,                               // src_stride_v,
-                    (uint16_t*)(writeImagePtr + dstSubresourceLayout[0].offset),                        // dst_y,
-                    (int)dstSubresourceLayout[0].rowPitch,                                              // dst_stride_y,
-                    (uint16_t*)(writeImagePtr + dstSubresourceLayout[1].offset),                        // dst_uv,
-                    (int)dstSubresourceLayout[1].rowPitch,                                              // dst_stride_uv,
-                    std::min(m_encoderConfig->encodeWidth,  m_encoderConfig->input.width),              // width
-                    std::min(m_encoderConfig->encodeHeight, m_encoderConfig->input.height),             // height
-                    shiftBits);
-        }
-
-    } else {
-        assert(!"Requested bit-depth is not supported!");
-    }
-
-    if (yCbCrConvResult == 0) {
-        // On success, stage the input frame for the encoder video input
-        return StageInputFrame(encodeFrameInfo);
-    }
-
-    return VK_ERROR_INITIALIZATION_FAILED;
+    // Direct plane copy - no color space conversion needed
+    CopyYCbCrPlanesDirectCPU(
+            pInputFrameData,                                               // Source buffer
+            m_encoderConfig->input.planeLayouts,                           // Source layouts
+            writeImagePtr,                                                 // Destination buffer
+            dstSubresourceLayout,                                          // Destination layouts
+            std::min(m_encoderConfig->encodeWidth, m_encoderConfig->input.width),    // Width
+            std::min(m_encoderConfig->encodeHeight, m_encoderConfig->input.height),  // Height
+            m_encoderConfig->input.numPlanes,                              // Number of planes
+            m_encoderConfig->input.vkFormat);                              // Format for subsampling detection
+
+    // Now stage the input frame for the encoder video input
+    return StageInputFrame(encodeFrameInfo);
 }
 
 VkResult VkVideoEncoder::StageInputFrameQpMap(VkSharedBaseObj<VkVideoEncodeFrameInfo>& encodeFrameInfo,
@@ -479,6 +405,121 @@ VkResult VkVideoEncoder::SubmitStagedQpMap(VkSharedBaseObj<VkVideoEncodeFrameInf
     return result;
 }
 
+/**
+ * @brief Copies YCbCr planes directly from input buffer to output buffer when formats are the same
+ *
+ * This function efficiently copies YCbCr data between buffers when the number of planes
+ * and bit depth are identical, but potentially with different pitch values. It handles
+ * 1, 2, or 3 plane formats and supports 8-bit and high bit-depth formats (10, 12, 16 bit).
+ * Properly handles different chroma subsampling (4:4:4, 4:2:2, 4:2:0).
+ *
+ * @param pInputFrameData Source buffer containing YCbCr planes
+ * @param inputPlaneLayouts Array of source buffer plane layouts (offset, pitch, etc.)
+ * @param writeImagePtr Destination buffer for the YCbCr planes
+ * @param dstSubresourceLayout Array of destination buffer plane layouts
+ * @param width Width of the image in pixels
+ * @param height Height of the image in pixels
+ * @param numPlanes Number of planes in the format (1, 2, or 3)
+ * @param format The VkFormat of the image for proper subsampling and bit depth detection
+ */
+void VkVideoEncoder::CopyYCbCrPlanesDirectCPU(
+    const uint8_t* pInputFrameData,
+    const VkSubresourceLayout* inputPlaneLayouts,
+    uint8_t* writeImagePtr,
+    const VkSubresourceLayout* dstSubresourceLayout,
+    uint32_t width,
+    uint32_t height,
+    uint32_t numPlanes,
+    VkFormat format)
+{
+    // Get format information
+    const VkMpFormatInfo* formatInfo = YcbcrVkFormatInfo(format);
+
+    // Determine bit depth and bytes per pixel from format
+    const uint32_t bitDepth = (formatInfo != nullptr) ? GetBitsPerChannel(formatInfo->planesLayout) : 8; // Default to 8-bit
+    const uint32_t bytesPerPixel = (bitDepth > 8) ? 2 : 1;
+
+    // Determine chroma subsampling ratios
+    const uint32_t chromaHorzRatio = (formatInfo != nullptr) ? (1 << formatInfo->planesLayout.secondaryPlaneSubsampledX) : 1;
+    const uint32_t chromaVertRatio = (formatInfo != nullptr) ? (1 << formatInfo->planesLayout.secondaryPlaneSubsampledY) : 1;
+
+    // Log the format subsampling for debugging
+    if (m_encoderConfig->verbose) {
+        const char* subsamplingDesc = "4:4:4";
+        if (chromaHorzRatio == 2 && chromaVertRatio == 2) {
+            subsamplingDesc = "4:2:0";
+        } else if (chromaHorzRatio == 2 && chromaVertRatio == 1) {
+            subsamplingDesc = "4:2:2";
+        }
+        printf("YCbCr copy with %s subsampling (chromaHorzRatio=%d, chromaVertRatio=%d), %d-bit\n",
+               subsamplingDesc, chromaHorzRatio, chromaVertRatio, bitDepth);
+    }
+
+    // Handle all planes
+    for (uint32_t plane = 0; plane < numPlanes; plane++) {
+        // Source and destination plane pointers
+        const uint8_t* srcPlane = pInputFrameData + inputPlaneLayouts[plane].offset;
+        uint8_t* dstPlane = writeImagePtr + dstSubresourceLayout[plane].offset;
+
+        // Get plane dimensions - adjust for chroma planes
+        uint32_t planeWidth = width;
+        uint32_t planeHeight = height;
+
+        // Adjust dimensions for chroma planes based on format subsampling
+        if (plane > 0) {
+            if (chromaHorzRatio > 1) {
+                planeWidth = (width + chromaHorzRatio - 1) / chromaHorzRatio;
+            }
+            if (chromaVertRatio > 1) {
+                planeHeight = (height + chromaVertRatio - 1) / chromaVertRatio;
+            }
+        }
+
+        // Source and destination strides
+        const size_t srcStride = inputPlaneLayouts[plane].rowPitch;
+        const size_t dstStride = dstSubresourceLayout[plane].rowPitch;
+
+        // Line width in bytes
+        const size_t lineBytes = planeWidth * bytesPerPixel;
+
+        // Get the starting pointers for this plane
+        const uint8_t* srcRow = srcPlane;
+        uint8_t* dstRow = dstPlane;
+
+        if (false && (bitDepth > 8)) {
+
+            const int shiftBits = 16 - bitDepth;
+
+            // Copy each line, incrementing pointers by stride amounts
+            for (uint32_t y = 0; y < planeHeight; y++) {
+
+                // Get the starting pointers for this row
+                const uint16_t* srcRow16 = (const uint16_t*)srcRow;
+                uint16_t* dstRow16 = (uint16_t*)dstRow;
+
+                for (uint32_t i = 0; i < planeWidth; i++) {
+                    *dstRow16++ = (*srcRow16++ << shiftBits);
+                }
+
+                // Advance to the next line using pointer arithmetic
+                srcRow += srcStride;
+                dstRow += dstStride;
+            }
+
+        } else {
+
+            // Copy each line, incrementing pointers by stride amounts
+            for (uint32_t y = 0; y < planeHeight; y++) {
+                // Copy the current line
+                memcpy(dstRow, srcRow, lineBytes);
+
+                // Advance to the next line using pointer arithmetic
+                srcRow += srcStride;
+                dstRow += dstStride;
+            }
+        }
+    }
+}
 
 VkResult VkVideoEncoder::SubmitStagedInputFrame(VkSharedBaseObj<VkVideoEncodeFrameInfo>& encodeFrameInfo)
 {
@@ -943,6 +984,7 @@ VkResult VkVideoEncoder::InitEncoder(VkSharedBaseObj<EncoderConfig>& encoderConf
                                              VK_IMAGE_USAGE_TRANSFER_DST_BIT);
     const VkImageUsageFlags dpbImageUsage = VK_IMAGE_USAGE_VIDEO_ENCODE_DPB_BIT_KHR;
 
+    // NOTE: Create linearInputImage
     result =  VulkanVideoImagePool::Create(m_vkDevCtx, m_linearInputImagePool);
     if(result != VK_SUCCESS) {
         fprintf(stderr, "\nInitEncoder Error: Failed to create linearInputImagePool.\n");
@@ -956,7 +998,7 @@ VkResult VkVideoEncoder::InitEncoder(VkSharedBaseObj<EncoderConfig>& encoderConf
 
     result = m_linearInputImagePool->Configure( m_vkDevCtx,
                                                 encoderConfig->numInputImages,
-                                                m_imageInFormat,
+                                                encoderConfig->input.vkFormat,
                                                 linearInputImageExtent,
                                                   ( VK_IMAGE_USAGE_SAMPLED_BIT |
                                                     VK_IMAGE_USAGE_STORAGE_BIT |
@@ -1217,8 +1259,10 @@ VkResult VkVideoEncoder::InitEncoder(VkSharedBaseObj<EncoderConfig>& encoderConf
                                                 0, // queueIndex
                                                 encoderConfig->filterType,
                                                 encoderConfig->numInputImages,
-                                                m_imageInFormat,  // in filter format (can be RGB)
+                                                encoderConfig->input.vkFormat,  // in filter format (can be RGB)
                                                 m_imageInFormat,  // out filter - same as input for now.
+                                                false, // inputEnableMsbToLsbShift
+                                                (encoderConfig->input.msbShift > 0),
                                                 &ycbcrConversionCreateInfo,
                                                 &ycbcrPrimariesConstants,
                                                 &samplerInfo,
diff --git a/vk_video_encoder/libs/VkVideoEncoder/VkVideoEncoder.h b/vk_video_encoder/libs/VkVideoEncoder/VkVideoEncoder.h
index 61c2ec84..c939bda6 100644
--- a/vk_video_encoder/libs/VkVideoEncoder/VkVideoEncoder.h
+++ b/vk_video_encoder/libs/VkVideoEncoder/VkVideoEncoder.h
@@ -559,6 +559,29 @@ class VkVideoEncoder : public VkVideoRefCountBase {
 
     const uint8_t* setPlaneOffset(const uint8_t* pFrameData, size_t bufferSize, size_t &currentReadOffset);
 
+    /**
+     * @brief Copies YCbCr planes directly from input buffer to output buffer when formats are the same
+     *
+     * @param pInputFrameData Source buffer containing YCbCr planes
+     * @param inputPlaneLayouts Array of source buffer plane layouts (offset, pitch, etc.)
+     * @param writeImagePtr Destination buffer for the YCbCr planes
+     * @param dstSubresourceLayout Array of destination buffer plane layouts
+     * @param width Width of the image in pixels
+     * @param height Height of the image in pixels
+     * @param numPlanes Number of planes in the format (1, 2, or 3)
+     * @param format The VkFormat of the image for proper subsampling and bit depth detection
+     * @return none
+     */
+    void CopyYCbCrPlanesDirectCPU(
+        const uint8_t* pInputFrameData,
+        const VkSubresourceLayout* inputPlaneLayouts,
+        uint8_t* writeImagePtr,
+        const VkSubresourceLayout* dstSubresourceLayout,
+        uint32_t width,
+        uint32_t height,
+        uint32_t numPlanes,
+        VkFormat format);
+
     bool WaitForThreadsToComplete();
 
 protected:

From f5659698c1b251242d6eb9cdcae7d9f67777eaf7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?St=C3=A9phane=20Cerveau?= <scerveau@igalia.com>
Date: Tue, 3 Jun 2025 16:38:13 +0200
Subject: [PATCH 03/14] encoder: allow to build without shaderc dep 2

Fixup of 62138ad
---
 vk_video_encoder/demos/vk-video-enc/CMakeLists.txt | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/vk_video_encoder/demos/vk-video-enc/CMakeLists.txt b/vk_video_encoder/demos/vk-video-enc/CMakeLists.txt
index d3bba268..7fe88cc3 100644
--- a/vk_video_encoder/demos/vk-video-enc/CMakeLists.txt
+++ b/vk_video_encoder/demos/vk-video-enc/CMakeLists.txt
@@ -118,6 +118,10 @@ if(TARGET vulkan)
     list(APPEND definitions PRIVATE -DUNINSTALLED_LOADER="$<TARGET_FILE:vulkan>")
 endif()
 
+if(USE_SHADERC)
+    list(APPEND definitions PRIVATE -DSHADERC_SUPPORT)
+endif()
+
 if(WIN32)
     list(APPEND definitions PRIVATE -DVK_USE_PLATFORM_WIN32_KHR)
     list(APPEND definitions PRIVATE -DWIN32_LEAN_AND_MEAN)

From ee5acf0da5e6c1a96e23a8e60a07e3a2eb2e77e9 Mon Sep 17 00:00:00 2001
From: "Vassili Nikolaev (NVIDIA)" <vnikolaev@nvidia.com>
Date: Mon, 9 Jun 2025 08:59:16 -0500
Subject: [PATCH 04/14] common: Code compilation fixes

---
 .../VkVideoCore/VulkanVideoCapabilities.h     |  2 +-
 common/include/mio/mio.hpp                    |  8 ++---
 common/libs/VkCodecUtils/VkThreadPool.h       |  5 ++-
 .../libs/VkCodecUtils/VkVideoFrameToFile.cpp  | 36 ++++++++++++-------
 .../VkCodecUtils/VulkanFilterYuvCompute.cpp   |  2 +-
 .../VkCodecUtils/VulkanShaderCompiler.cpp     | 13 +++++--
 .../VkCodecUtils/VulkanVideoProcessor.cpp     | 25 ++++++-------
 .../libs/VkCodecUtils/VulkanVideoProcessor.h  |  4 +++
 common/libs/VkShell/Shell.h                   |  3 ++
 .../NvVideoParser/src/VulkanAV1Decoder.cpp    | 10 ++++--
 .../src/VulkanAV1GlobalMotionDec.cpp          |  2 +-
 vk_video_decoder/src/vulkan_video_decoder.cpp |  9 ++---
 vk_video_encoder/demos/vk-video-enc/Main.cpp  |  2 +-
 .../include/vulkan_video_encoder.h            |  4 +--
 .../libs/VkVideoEncoder/VkEncoderConfig.cpp   | 10 +++---
 .../libs/VkVideoEncoder/VkEncoderConfig.h     |  6 ++--
 .../VkVideoEncoder/VkEncoderConfigAV1.cpp     |  2 +-
 .../libs/VkVideoEncoder/VkEncoderConfigAV1.h  |  2 +-
 .../libs/VkVideoEncoder/VkEncoderDpbH264.h    |  2 +-
 .../libs/VkVideoEncoder/VkVideoEncoder.cpp    |  8 +++--
 .../libs/VkVideoEncoder/VkVideoGopStructure.h |  7 ++--
 vk_video_encoder/src/vulkan_video_encoder.cpp |  6 ++--
 .../test/vulkan-video-enc/Main.cpp            |  2 +-
 23 files changed, 104 insertions(+), 66 deletions(-)

diff --git a/common/include/VkVideoCore/VulkanVideoCapabilities.h b/common/include/VkVideoCore/VulkanVideoCapabilities.h
index 8e0caf4f..3c8f572f 100644
--- a/common/include/VkVideoCore/VulkanVideoCapabilities.h
+++ b/common/include/VkVideoCore/VulkanVideoCapabilities.h
@@ -360,7 +360,7 @@ class VulkanVideoCapabilities
             }
         }
 
-        formatCount = std::min(supportedFormatCount, formatCount);
+        formatCount = std::min<uint32_t>(supportedFormatCount, formatCount);
 
         for (uint32_t i = 0; i < formatCount; i++) {
             formats[i] = pSupportedFormats[i].format;
diff --git a/common/include/mio/mio.hpp b/common/include/mio/mio.hpp
index 5cd55ea8..3c3e1adb 100644
--- a/common/include/mio/mio.hpp
+++ b/common/include/mio/mio.hpp
@@ -786,13 +786,13 @@ namespace win {
 /** Returns the 4 upper bytes of an 8-byte integer. */
 inline DWORD int64_high(int64_t n) noexcept
 {
-    return n >> 32;
+    return (DWORD)(n >> 32);
 }
 
 /** Returns the 4 lower bytes of an 8-byte integer. */
 inline DWORD int64_low(int64_t n) noexcept
 {
-    return n & 0xffffffff;
+    return (DWORD)(n & 0xffffffff);
 }
 
 inline std::wstring s_2_ws(const std::string& s)
@@ -887,7 +887,7 @@ inline size_t query_file_size(file_handle_type handle, std::error_code& error)
         error = detail::last_error();
         return 0;
     }
-	return static_cast<int64_t>(file_size.QuadPart);
+	return static_cast<size_t>(file_size.QuadPart);
 #else // POSIX
     struct stat sbuf;
     if(::fstat(handle, &sbuf) == -1)
@@ -933,7 +933,7 @@ inline mmap_context memory_map(const file_handle_type file_handle, const int64_t
             mode == access_mode::read ? FILE_MAP_READ : FILE_MAP_WRITE,
             win::int64_high(aligned_offset),
             win::int64_low(aligned_offset),
-            length_to_map));
+            (size_t)length_to_map));
     if(mapping_start == nullptr)
     {
         // Close file handle if mapping it failed.
diff --git a/common/libs/VkCodecUtils/VkThreadPool.h b/common/libs/VkCodecUtils/VkThreadPool.h
index 44d31bd1..b9d5a508 100644
--- a/common/libs/VkCodecUtils/VkThreadPool.h
+++ b/common/libs/VkCodecUtils/VkThreadPool.h
@@ -65,8 +65,11 @@ class VkThreadPool
         std::future<return_type> res = task->get_future();
         {
             std::unique_lock<std::mutex> lock(queue_mutex);
-            if(stop)
+            if(stop) {
+#ifdef __cpp_exceptions
                 throw std::runtime_error("enqueue on stopped ThreadPool");
+#endif
+            }
 
             tasks.emplace([task](){ (*task)(); });
         }
diff --git a/common/libs/VkCodecUtils/VkVideoFrameToFile.cpp b/common/libs/VkCodecUtils/VkVideoFrameToFile.cpp
index 846a0890..c5ca2d0e 100644
--- a/common/libs/VkCodecUtils/VkVideoFrameToFile.cpp
+++ b/common/libs/VkCodecUtils/VkVideoFrameToFile.cpp
@@ -240,7 +240,7 @@ class VkVideoFrameToFileImpl : public VkVideoFrameOutput {
         }
 
         if (m_outputcrcPerFrame && m_crcOutputFile) {
-            fprintf(m_crcOutputFile, "CRC Frame[%" PRId64 "]:", pFrame->displayOrder);
+            fprintf(m_crcOutputFile, "CRC Frame[%lld]:", (long long)pFrame->displayOrder);
             for (size_t i = 0; i < m_crcInitValue.size(); i += 1) {
                 uint32_t frameCrc = m_crcInitValue[i];
                 getCRC(&frameCrc, pOutputBuffer, usedBufferSize, Crc32Table);
@@ -415,6 +415,7 @@ class VkVideoFrameToFileImpl : public VkVideoFrameOutput {
         VkDeviceSize maxSize = 0;
         const uint8_t* readImagePtr = srcImageDeviceMemory->GetReadOnlyDataPtr(imageOffset, maxSize);
         assert(readImagePtr != nullptr);
+        assert(maxSize <= SIZE_MAX);  // Ensure we don't lose data in conversion
 
         int32_t secondaryPlaneWidth = frameWidth;
         int32_t secondaryPlaneHeight = frameHeight;
@@ -490,15 +491,19 @@ class VkVideoFrameToFileImpl : public VkVideoFrameOutput {
         // Copy the luma plane
         const uint32_t numCompatiblePlanes = 1;
         for (uint32_t plane = 0; plane < numCompatiblePlanes; plane++) {
-            const uint8_t* pSrc = readImagePtr + layouts[plane].offset;
-            uint8_t* pDst = pOutBuffer + yuvPlaneLayouts[plane].offset;
+            const uint8_t* pSrc = readImagePtr + static_cast<size_t>(layouts[plane].offset);
+            uint8_t* pDst = pOutBuffer + static_cast<size_t>(yuvPlaneLayouts[plane].offset);
 
             if (is8Bit) {
-                CopyPlaneData<uint8_t>(pSrc, pDst, layouts[plane].rowPitch, yuvPlaneLayouts[plane].rowPitch,
+                assert(layouts[plane].rowPitch <= SIZE_MAX);
+                assert(yuvPlaneLayouts[plane].rowPitch <= SIZE_MAX);
+                CopyPlaneData<uint8_t>(pSrc, pDst, static_cast<size_t>(layouts[plane].rowPitch), static_cast<size_t>(yuvPlaneLayouts[plane].rowPitch),
                                       frameWidth, imageHeight);
             } else {
-                CopyPlaneData<uint16_t>(pSrc, pDst, layouts[plane].rowPitch, yuvPlaneLayouts[plane].rowPitch,
-                                       frameWidth, imageHeight, 1, bitShift);
+                assert(layouts[plane].rowPitch <= SIZE_MAX);
+                assert(yuvPlaneLayouts[plane].rowPitch <= SIZE_MAX);
+                CopyPlaneData<uint16_t>(pSrc, pDst, static_cast<size_t>(layouts[plane].rowPitch), static_cast<size_t>(yuvPlaneLayouts[plane].rowPitch),
+                                       frameWidth, imageHeight);
             }
         }
 
@@ -517,21 +522,25 @@ class VkVideoFrameToFileImpl : public VkVideoFrameOutput {
                 }
 
                 if (is8Bit) {
-                    CopyPlaneData<uint8_t>(pSrc, pDst, layouts[srcPlane].rowPitch, yuvPlaneLayouts[plane].rowPitch,
+                    assert(layouts[srcPlane].rowPitch <= SIZE_MAX);
+                    assert(yuvPlaneLayouts[plane].rowPitch <= SIZE_MAX);
+                    CopyPlaneData<uint8_t>(pSrc, pDst, static_cast<size_t>(layouts[srcPlane].rowPitch), static_cast<size_t>(yuvPlaneLayouts[plane].rowPitch),
                                            planeWidth, 1, 2);
                 } else {
-                    CopyPlaneData<uint16_t>(pSrc, pDst, layouts[srcPlane].rowPitch, yuvPlaneLayouts[plane].rowPitch,
-                                            planeWidth, 1, 2, bitShift);
+                    assert(layouts[srcPlane].rowPitch <= SIZE_MAX);
+                    assert(yuvPlaneLayouts[plane].rowPitch <= SIZE_MAX);
+                    CopyPlaneData<uint16_t>(pSrc, pDst, static_cast<size_t>(layouts[srcPlane].rowPitch), static_cast<size_t>(yuvPlaneLayouts[plane].rowPitch),
+                                            planeWidth, 1, 2);
                 }
                 pDst += yuvPlaneLayouts[plane].rowPitch;
             }
         }
 
         // Calculate total buffer size
-        outputBufferSize = yuvPlaneLayouts[0].rowPitch * imageHeight;
+        outputBufferSize = static_cast<size_t>(yuvPlaneLayouts[0].rowPitch * imageHeight);
         if (mpInfo->planesLayout.numberOfExtraPlanes >= 1) {
-            outputBufferSize += yuvPlaneLayouts[1].rowPitch * secondaryPlaneHeight;
-            outputBufferSize += yuvPlaneLayouts[2].rowPitch * secondaryPlaneHeight;
+            outputBufferSize += static_cast<size_t>(yuvPlaneLayouts[1].rowPitch * secondaryPlaneHeight);
+            outputBufferSize += static_cast<size_t>(yuvPlaneLayouts[2].rowPitch * secondaryPlaneHeight);
         }
 
         return outputBufferSize;
@@ -545,6 +554,7 @@ class VkVideoFrameToFileImpl : public VkVideoFrameOutput {
         }
 
         VkDeviceSize imageMemorySize = imageResource->GetImageDeviceMemorySize();
+        assert(imageMemorySize <= SIZE_MAX);  // Ensure we don't lose data in conversion
 
         if ((m_pLinearMemory == nullptr) || (imageMemorySize > m_allocationSize)) {
             if (m_outputFile) {
@@ -556,7 +566,7 @@ class VkVideoFrameToFileImpl : public VkVideoFrameOutput {
                 m_pLinearMemory = nullptr;
             }
 
-            m_allocationSize = (size_t)(imageMemorySize);
+            m_allocationSize = static_cast<size_t>(imageMemorySize);
             m_pLinearMemory = new uint8_t[m_allocationSize];
             if (m_pLinearMemory == nullptr) {
                 return nullptr;
diff --git a/common/libs/VkCodecUtils/VulkanFilterYuvCompute.cpp b/common/libs/VkCodecUtils/VulkanFilterYuvCompute.cpp
index 906cc229..597f5d7c 100644
--- a/common/libs/VkCodecUtils/VulkanFilterYuvCompute.cpp
+++ b/common/libs/VkCodecUtils/VulkanFilterYuvCompute.cpp
@@ -214,7 +214,7 @@ static YcbcrBtStandard GetYcbcrPrimariesConstantsId(VkSamplerYcbcrModelConversio
  *
  * @param shaderStr Output stringstream where the GLSL code will be written
  */
-void GenPushConstantsDecl(std::stringstream& shaderStr) {
+static void GenPushConstantsDecl(std::stringstream& shaderStr) {
     shaderStr << "layout(push_constant) uniform PushConstants {\n"
               << "    uint srcLayer;        // src image layer to use\n"
               << "    uint dstLayer;        // dst image layer to use\n"
diff --git a/common/libs/VkCodecUtils/VulkanShaderCompiler.cpp b/common/libs/VkCodecUtils/VulkanShaderCompiler.cpp
index 20fc073e..89215a8b 100644
--- a/common/libs/VkCodecUtils/VulkanShaderCompiler.cpp
+++ b/common/libs/VkCodecUtils/VulkanShaderCompiler.cpp
@@ -103,13 +103,19 @@ VkShaderModule VulkanShaderCompiler::BuildShaderFromFile(const char *fileName,
                                                          VkShaderStageFlagBits type,
                                                          const VulkanDeviceContext* vkDevCtx)
 {
+#ifdef seekg
     // read file from the path
     std::ifstream is(fileName, std::ios::binary | std::ios::in | std::ios::ate);
 
     if (is.is_open()) {
-
-        size_t size = is.tellg();
-        is.seekg(0, std::ios::beg);
+        is.seekg (0, is.end);
+        std::streamoff fileSize = is.tellg();
+        if (fileSize < 0 || static_cast<size_t>(fileSize) > std::numeric_limits<size_t>::max()) {
+            std::cerr << "File size is too large or invalid" << std::endl;
+            return VK_NULL_HANDLE;
+        }
+        size_t size = static_cast<size_t>(fileSize);
+        is.seekg(0, is.beg);
         char* shaderCode = new char[size];
         is.read(shaderCode, size);
         is.close();
@@ -122,6 +128,7 @@ VkShaderModule VulkanShaderCompiler::BuildShaderFromFile(const char *fileName,
 
         return shaderModule;
     }
+#endif
 
     return VK_NULL_HANDLE;
 }
diff --git a/common/libs/VkCodecUtils/VulkanVideoProcessor.cpp b/common/libs/VkCodecUtils/VulkanVideoProcessor.cpp
index ebe00067..d6e1fd18 100644
--- a/common/libs/VkCodecUtils/VulkanVideoProcessor.cpp
+++ b/common/libs/VkCodecUtils/VulkanVideoProcessor.cpp
@@ -115,11 +115,13 @@ VkResult VulkanVideoProcessor::Initialize(const VulkanDeviceContext* vkDevCtx,
         return result;
     }
 
-    VkVideoCoreProfile videoProfile(m_videoStreamDemuxer->GetVideoCodec(),
-                                    m_videoStreamDemuxer->GetChromaSubsampling(),
-                                    m_videoStreamDemuxer->GetLumaBitDepth(),
-                                    m_videoStreamDemuxer->GetChromaBitDepth(),
-                                    m_videoStreamDemuxer->GetProfileIdc());
+    VkVideoCoreProfile videoProfile ({
+        m_videoStreamDemuxer->GetVideoCodec(),
+        m_videoStreamDemuxer->GetChromaSubsampling(),
+        m_videoStreamDemuxer->GetLumaBitDepth(),
+        m_videoStreamDemuxer->GetChromaBitDepth(),
+        m_videoStreamDemuxer->GetProfileIdc()
+    });
 
     if (!VulkanVideoCapabilities::IsCodecTypeSupported(vkDevCtx,
                                                        vkDevCtx->GetVideoDecodeQueueFamilyIdx(),
@@ -194,12 +196,11 @@ VkResult VulkanVideoProcessor::Create(const DecoderConfig& settings, const Vulka
 
 VkVideoProfileInfoKHR VulkanVideoProcessor::GetVkProfile() const
 {
-
-    VkVideoProfileInfoKHR videoProfile({VK_STRUCTURE_TYPE_VIDEO_PROFILE_INFO_KHR, NULL,
+    VkVideoProfileInfoKHR videoProfile {VK_STRUCTURE_TYPE_VIDEO_PROFILE_INFO_KHR, NULL,
                                         m_videoStreamDemuxer->GetVideoCodec(),
                                         m_videoStreamDemuxer->GetChromaSubsampling(),
                                         m_videoStreamDemuxer->GetLumaBitDepth(),
-                                        m_videoStreamDemuxer->GetChromaBitDepth()});
+                                        m_videoStreamDemuxer->GetChromaBitDepth()};
 
     return videoProfile;
 }
@@ -229,10 +230,10 @@ VkFormat VulkanVideoProcessor::GetFrameImageFormat()  const
 
 VkExtent3D VulkanVideoProcessor::GetVideoExtent() const
 {
-    VkExtent3D extent ({ (uint32_t)m_videoStreamDemuxer->GetWidth(),
-                         (uint32_t)m_videoStreamDemuxer->GetHeight(),
-                         (uint32_t)1
-                       });
+    VkExtent3D extent { (uint32_t)m_videoStreamDemuxer->GetWidth(),
+                        (uint32_t)m_videoStreamDemuxer->GetHeight(),
+                        (uint32_t)1
+                      };
     return extent;
 }
 
diff --git a/common/libs/VkCodecUtils/VulkanVideoProcessor.h b/common/libs/VkCodecUtils/VulkanVideoProcessor.h
index cbdca1f1..0eb08e9c 100644
--- a/common/libs/VkCodecUtils/VulkanVideoProcessor.h
+++ b/common/libs/VkCodecUtils/VulkanVideoProcessor.h
@@ -23,6 +23,10 @@
 #include "VkCodecUtils/VkVideoQueue.h"
 #include "VkVideoFrameOutput.h"
 
+// Forward declarations
+class VulkanDeviceContext;
+struct VkMpFormatInfo;
+
 class VulkanVideoProcessor : public VkVideoQueue<VulkanDecodedFrame> {
 public:
 
diff --git a/common/libs/VkShell/Shell.h b/common/libs/VkShell/Shell.h
index c9c6c233..b91223b0 100644
--- a/common/libs/VkShell/Shell.h
+++ b/common/libs/VkShell/Shell.h
@@ -66,7 +66,10 @@ class Shell : public VkWsiDisplay, public VkVideoRefCountBase {
         if ((res != VK_SUCCESS) && (res != VK_SUBOPTIMAL_KHR)) {
             std::stringstream ss;
             ss << "VkResult " << res << " returned";
+#ifdef __cpp_exceptions
             throw std::runtime_error(ss.str());
+#endif // __cpp_exceptions
+
         }
 
         return res;
diff --git a/vk_video_decoder/libs/NvVideoParser/src/VulkanAV1Decoder.cpp b/vk_video_decoder/libs/NvVideoParser/src/VulkanAV1Decoder.cpp
index bc65f33f..c401eec1 100644
--- a/vk_video_decoder/libs/NvVideoParser/src/VulkanAV1Decoder.cpp
+++ b/vk_video_decoder/libs/NvVideoParser/src/VulkanAV1Decoder.cpp
@@ -1132,7 +1132,7 @@ static uint32_t tile_log2(int blk_size, int target)
     return k;
 }
 
-uint32_t FloorLog2(uint32_t x)
+static uint32_t FloorLog2(uint32_t x)
 {
     int s = 0;
 
@@ -2289,7 +2289,11 @@ bool VulkanAV1Decoder::ParseObuTileGroup(const AV1ObuHeader& hdr)
             consumedBytes += tile_size_bytes_minus_1 + 1;
             m_PicData.tileOffsets[m_PicData.khr_info.tileCount] = (uint32_t)m_nalu.start_offset + (uint32_t)consumedBytes;
 
-            tileSize = tile_size_minus_1 + 1;
+            // Add bounds checking and safe conversion
+            if (tile_size_minus_1 > (SIZE_MAX - 1)) {
+                return false; // Tile size too large
+            }
+            tileSize = (size_t)(tile_size_minus_1 + 1);
             consumedBytes += (uint32_t)tileSize;
 
             skip_bits((uint32_t)(tileSize * 8));
@@ -2302,7 +2306,7 @@ bool VulkanAV1Decoder::ParseObuTileGroup(const AV1ObuHeader& hdr)
     return (tg_end == num_tiles - 1);
 }
 
-bool IsObuInCurrentOperatingPoint(int  current_operating_point, AV1ObuHeader *hdr) {
+static bool IsObuInCurrentOperatingPoint(int  current_operating_point, AV1ObuHeader *hdr) {
     if (current_operating_point == 0) return true;
     if (((current_operating_point >> hdr->temporal_id) & 0x1) &&
         ((current_operating_point >> (hdr->spatial_id + 8)) & 0x1)) {
diff --git a/vk_video_decoder/libs/NvVideoParser/src/VulkanAV1GlobalMotionDec.cpp b/vk_video_decoder/libs/NvVideoParser/src/VulkanAV1GlobalMotionDec.cpp
index e5a35316..37691fe5 100644
--- a/vk_video_decoder/libs/NvVideoParser/src/VulkanAV1GlobalMotionDec.cpp
+++ b/vk_video_decoder/libs/NvVideoParser/src/VulkanAV1GlobalMotionDec.cpp
@@ -82,7 +82,7 @@
 #define WARP_PARAM_REDUCE_BITS 6
 #define WARPEDMODEL_PREC_BITS 16
 
-int get_msb(unsigned int n)
+static int get_msb(unsigned int n)
 {
     int log = 0;
     unsigned int value = n;
diff --git a/vk_video_decoder/src/vulkan_video_decoder.cpp b/vk_video_decoder/src/vulkan_video_decoder.cpp
index 1d0e0541..f98f3f82 100644
--- a/vk_video_decoder/src/vulkan_video_decoder.cpp
+++ b/vk_video_decoder/src/vulkan_video_decoder.cpp
@@ -66,10 +66,11 @@ class VulkanVideoDecoderImpl : public VulkanVideoDecoder {
 
     virtual VkExtent3D GetVideoExtent() const
     {
-        VkExtent3D extent ({ (uint32_t)m_vulkanVideoProcessor->GetWidth(),
-                             (uint32_t)m_vulkanVideoProcessor->GetHeight(),
-                             (uint32_t)1
-                           });
+        VkExtent3D extent {
+            (uint32_t)m_vulkanVideoProcessor->GetWidth(),
+            (uint32_t)m_vulkanVideoProcessor->GetHeight(),
+            1
+        };
         return extent;
     }
 
diff --git a/vk_video_encoder/demos/vk-video-enc/Main.cpp b/vk_video_encoder/demos/vk-video-enc/Main.cpp
index 31d24b2d..bb849f72 100644
--- a/vk_video_encoder/demos/vk-video-enc/Main.cpp
+++ b/vk_video_encoder/demos/vk-video-enc/Main.cpp
@@ -21,7 +21,7 @@
 #include "VkCodecUtils/VulkanEncoderFrameProcessor.h"
 #include "VkShell/Shell.h"
 
-int main(int argc, char** argv)
+int main(int argc, const char* argv[])
 {
     VkSharedBaseObj<EncoderConfig> encoderConfig;
     if (VK_SUCCESS != EncoderConfig::CreateCodecConfig(argc, argv, encoderConfig)) {
diff --git a/vk_video_encoder/include/vulkan_video_encoder.h b/vk_video_encoder/include/vulkan_video_encoder.h
index e757f238..f170fd4a 100644
--- a/vk_video_encoder/include/vulkan_video_encoder.h
+++ b/vk_video_encoder/include/vulkan_video_encoder.h
@@ -43,7 +43,7 @@
 class VulkanVideoEncoder : public virtual VkVideoRefCountBase {
 public:
     virtual VkResult Initialize(VkVideoCodecOperationFlagBitsKHR videoCodecOperation,
-                                int argc, char** argv) = 0;
+                                int argc, const char** argv) = 0;
     virtual int64_t  GetNumberOfFrames() = 0;
     virtual VkResult EncodeNextFrame(int64_t& frameNumEncoded) = 0;
     virtual VkResult GetBitstream() = 0;
@@ -52,7 +52,7 @@ class VulkanVideoEncoder : public virtual VkVideoRefCountBase {
 
 extern "C" VK_VIDEO_ENCODER_EXPORT
 VkResult CreateVulkanVideoEncoder(VkVideoCodecOperationFlagBitsKHR videoCodecOperation,
-                                  int argc, char** argv,
+                                  int argc, const char** argv,
                                   VkSharedBaseObj<VulkanVideoEncoder>& vulkanVideoEncoder);
 
 #endif /* _VULKAN_VIDEO_ENCODER_H_ */
diff --git a/vk_video_encoder/libs/VkVideoEncoder/VkEncoderConfig.cpp b/vk_video_encoder/libs/VkVideoEncoder/VkEncoderConfig.cpp
index 53d7cec3..fdfe92de 100644
--- a/vk_video_encoder/libs/VkVideoEncoder/VkEncoderConfig.cpp
+++ b/vk_video_encoder/libs/VkVideoEncoder/VkEncoderConfig.cpp
@@ -19,7 +19,7 @@
 #include "VkVideoEncoder/VkEncoderConfigH265.h"
 #include "VkVideoEncoder/VkEncoderConfigAV1.h"
 
-void printHelp(VkVideoCodecOperationFlagBitsKHR codec)
+static void printHelp(VkVideoCodecOperationFlagBitsKHR codec)
 {
     fprintf(stderr,
     "Version: " VKVS_VERSION_STRING "\n"\
@@ -156,10 +156,10 @@ void printHelp(VkVideoCodecOperationFlagBitsKHR codec)
         }
 }
 
-int EncoderConfig::ParseArguments(int argc, char *argv[])
+int EncoderConfig::ParseArguments(int argc, const char *argv[])
 {
     int argcount = 0;
-    std::vector<char*> arglist;
+    std::vector<const char*> arglist;
     std::vector<std::string> args(argv, argv + argc);
     uint32_t frameCount = 0;
 
@@ -572,7 +572,7 @@ int EncoderConfig::ParseArguments(int argc, char *argv[])
             gopStructure.SetIntraRefreshSkippedStartIndex(intraRefreshSkippedStartIndex);
         } else {
             argcount++;
-            arglist.push_back((char*)args[i].c_str());
+            arglist.push_back(args[i].c_str());
         }
     }
 
@@ -703,7 +703,7 @@ int EncoderConfig::ParseArguments(int argc, char *argv[])
     return DoParseArguments(argcount, arglist.data());
 }
 
-VkResult EncoderConfig::CreateCodecConfig(int argc, char *argv[],
+VkResult EncoderConfig::CreateCodecConfig(int argc, const char *argv[],
                                           VkSharedBaseObj<EncoderConfig>& encoderConfig)
 {
 
diff --git a/vk_video_encoder/libs/VkVideoEncoder/VkEncoderConfig.h b/vk_video_encoder/libs/VkVideoEncoder/VkEncoderConfig.h
index 94adb438..896c1636 100644
--- a/vk_video_encoder/libs/VkVideoEncoder/VkEncoderConfig.h
+++ b/vk_video_encoder/libs/VkVideoEncoder/VkEncoderConfig.h
@@ -913,13 +913,13 @@ struct EncoderConfig : public VkVideoRefCountBase {
     }
 
     // Factory Function
-    static VkResult CreateCodecConfig(int argc, char *argv[], VkSharedBaseObj<EncoderConfig>& encoderConfig);
+    static VkResult CreateCodecConfig(int argc, const char *argv[], VkSharedBaseObj<EncoderConfig>& encoderConfig);
 
     void InitVideoProfile();
 
-    int ParseArguments(int argc, char *argv[]);
+    int ParseArguments(int argc, const char *argv[]);
 
-    virtual int DoParseArguments(int argc, char *argv[]) {
+    virtual int DoParseArguments(int argc, const char *argv[]) {
         if (argc > 0) {
             std::cout << "Invalid paramters: ";
             for (int i = 0; i < argc; i++) {
diff --git a/vk_video_encoder/libs/VkVideoEncoder/VkEncoderConfigAV1.cpp b/vk_video_encoder/libs/VkVideoEncoder/VkEncoderConfigAV1.cpp
index aeab421d..c3ba67c1 100644
--- a/vk_video_encoder/libs/VkVideoEncoder/VkEncoderConfigAV1.cpp
+++ b/vk_video_encoder/libs/VkVideoEncoder/VkEncoderConfigAV1.cpp
@@ -26,7 +26,7 @@
     }                                                           \
 }
 
-int EncoderConfigAV1::DoParseArguments(int argc, char* argv[])
+int EncoderConfigAV1::DoParseArguments(int argc, const char* argv[])
 {
     // No validation of command line options.  So, all options must be valid and
     // values with in the limits of vulkan and av1 specification
diff --git a/vk_video_encoder/libs/VkVideoEncoder/VkEncoderConfigAV1.h b/vk_video_encoder/libs/VkVideoEncoder/VkEncoderConfigAV1.h
index 0838e2c8..622977d6 100644
--- a/vk_video_encoder/libs/VkVideoEncoder/VkEncoderConfigAV1.h
+++ b/vk_video_encoder/libs/VkVideoEncoder/VkEncoderConfigAV1.h
@@ -88,7 +88,7 @@ struct EncoderConfigAV1 : public EncoderConfig {
     }
     virtual ~EncoderConfigAV1() {}
 
-    virtual int DoParseArguments(int argc, char* argv[]) override;
+    virtual int DoParseArguments(int argc, const char* argv[]) override;
 
     virtual VkResult InitializeParameters() override
     {
diff --git a/vk_video_encoder/libs/VkVideoEncoder/VkEncoderDpbH264.h b/vk_video_encoder/libs/VkVideoEncoder/VkEncoderDpbH264.h
index a54bed3c..c828c3c7 100644
--- a/vk_video_encoder/libs/VkVideoEncoder/VkEncoderDpbH264.h
+++ b/vk_video_encoder/libs/VkVideoEncoder/VkEncoderDpbH264.h
@@ -139,7 +139,7 @@ class VkEncDpbH264
     const StdVideoEncodeH264PictureInfo *GetCurrentDpbEntry(void)
     {
         assert((m_currDpbIdx < m_max_dpb_size) || (m_currDpbIdx == MAX_DPB_SLOTS));
-        return &m_DPB[m_currDpbIdx].picInfo;
+        return &m_DPB[(int)m_currDpbIdx].picInfo;
     }
 
     uint32_t GetUpdatedFrameNumAndPicOrderCnt(int32_t& PicOrderCnt)
diff --git a/vk_video_encoder/libs/VkVideoEncoder/VkVideoEncoder.cpp b/vk_video_encoder/libs/VkVideoEncoder/VkVideoEncoder.cpp
index 84e83dfa..5468511c 100644
--- a/vk_video_encoder/libs/VkVideoEncoder/VkVideoEncoder.cpp
+++ b/vk_video_encoder/libs/VkVideoEncoder/VkVideoEncoder.cpp
@@ -476,8 +476,10 @@ void VkVideoEncoder::CopyYCbCrPlanesDirectCPU(
         }
 
         // Source and destination strides
-        const size_t srcStride = inputPlaneLayouts[plane].rowPitch;
-        const size_t dstStride = dstSubresourceLayout[plane].rowPitch;
+        assert(inputPlaneLayouts[plane].rowPitch <= SIZE_MAX);
+        assert(dstSubresourceLayout[plane].rowPitch <= SIZE_MAX);
+        const size_t srcStride = (size_t)inputPlaneLayouts[plane].rowPitch;
+        const size_t dstStride = (size_t)dstSubresourceLayout[plane].rowPitch;
 
         // Line width in bytes
         const size_t lineBytes = planeWidth * bytesPerPixel;
@@ -1449,7 +1451,9 @@ VkImageLayout VkVideoEncoder::TransitionImageLayout(VkCommandBuffer cmdBuf,
         imageBarrier.srcStageMask = VK_PIPELINE_STAGE_2_VIDEO_ENCODE_BIT_KHR;
         imageBarrier.dstStageMask = VK_PIPELINE_STAGE_2_VIDEO_ENCODE_BIT_KHR;
     } else {
+#ifdef __cpp_exceptions
         throw std::invalid_argument("unsupported layout transition!");
+#endif
     }
 
     const VkDependencyInfoKHR dependencyInfo = {
diff --git a/vk_video_encoder/libs/VkVideoEncoder/VkVideoGopStructure.h b/vk_video_encoder/libs/VkVideoEncoder/VkVideoGopStructure.h
index d3b1ab0a..2ab76bcd 100644
--- a/vk_video_encoder/libs/VkVideoEncoder/VkVideoGopStructure.h
+++ b/vk_video_encoder/libs/VkVideoEncoder/VkVideoGopStructure.h
@@ -25,6 +25,7 @@
 #include <functional>
 #include <iostream>
 #include <iomanip>
+#include <algorithm>  // for std::min
 
 static const uint32_t MAX_GOP_SIZE = 64;
 
@@ -207,15 +208,15 @@ class VkVideoGopStructure {
 
             uint32_t periodDelta = INT32_MAX; // the delta of this frame to the next closed GOP reference. -1 if it is not a B-frame
             if (framesLeft <= consecutiveBFrameCount) { // Handle last frames sequence
-                periodDelta = std::min(periodDelta, framesLeft);
+                periodDelta = std::min<uint32_t>(periodDelta, framesLeft);
             }
 
             if (m_idrPeriod > 0) { // Is the IDR period valid
-                periodDelta = std::min(periodDelta, GetPeriodDelta(gopState, m_idrPeriod));
+                periodDelta = std::min<uint32_t>(periodDelta, GetPeriodDelta(gopState, m_idrPeriod));
             }
 
             if (m_closedGop) { // A closed GOP is required.
-                periodDelta = std::min(periodDelta, GetPeriodDelta(gopState, m_gopFrameCount));
+                periodDelta = std::min<uint32_t>(periodDelta, GetPeriodDelta(gopState, m_gopFrameCount));
             }
 
             uint32_t refDelta = INT32_MAX;    // the delta of this frame from the last reference. -1 if it is not a B-frame
diff --git a/vk_video_encoder/src/vulkan_video_encoder.cpp b/vk_video_encoder/src/vulkan_video_encoder.cpp
index 61c3637d..ae44f7ce 100644
--- a/vk_video_encoder/src/vulkan_video_encoder.cpp
+++ b/vk_video_encoder/src/vulkan_video_encoder.cpp
@@ -23,7 +23,7 @@
 class VulkanVideoEncoderImpl : public VulkanVideoEncoder {
 public:
     virtual VkResult Initialize(VkVideoCodecOperationFlagBitsKHR videoCodecOperation,
-                                int argc, char** argv);
+                                int argc, const char** argv);
     virtual int64_t GetNumberOfFrames()
     {
         return m_encoderConfig->numFrames;
@@ -81,7 +81,7 @@ class VulkanVideoEncoderImpl : public VulkanVideoEncoder {
 };
 
 VkResult VulkanVideoEncoderImpl::Initialize(VkVideoCodecOperationFlagBitsKHR videoCodecOperation,
-                                            int argc, char** argv)
+                                            int argc, const char** argv)
 {
     VkResult result = EncoderConfig::CreateCodecConfig(argc, argv, m_encoderConfig);
     if (VK_SUCCESS != result) {
@@ -235,7 +235,7 @@ VkResult VulkanVideoEncoderImpl::EncodeNextFrame(int64_t& frameNumEncoded)
 
 VK_VIDEO_ENCODER_EXPORT
 VkResult CreateVulkanVideoEncoder(VkVideoCodecOperationFlagBitsKHR videoCodecOperation,
-                                  int argc, char** argv,
+                                  int argc, const char** argv,
                                   VkSharedBaseObj<VulkanVideoEncoder>& vulkanVideoEncoder)
 {
     switch((uint32_t)videoCodecOperation)
diff --git a/vk_video_encoder/test/vulkan-video-enc/Main.cpp b/vk_video_encoder/test/vulkan-video-enc/Main.cpp
index 58c5cb49..09f55420 100644
--- a/vk_video_encoder/test/vulkan-video-enc/Main.cpp
+++ b/vk_video_encoder/test/vulkan-video-enc/Main.cpp
@@ -18,7 +18,7 @@
 #include "vulkan_video_encoder.h"
 #include "VkVSCommon.h"
 
-int main(int argc, char** argv)
+int main(int argc, const char** argv)
 {
     std::cout << "Enter encoder test" << std::endl;
     VkSharedBaseObj<VulkanVideoEncoder> vulkanVideoEncoder;

From 804b94d3c8ea7240faa6aac143d9b42c8ad38c33 Mon Sep 17 00:00:00 2001
From: "Vassili Nikolaev (NVIDIA)" <vnikolaev@nvidia.com>
Date: Mon, 9 Jun 2025 09:13:35 -0500
Subject: [PATCH 05/14] common: Use the CRC generator instead of embedding it

---
 .../libs/VkCodecUtils/VkVideoFrameToFile.cpp  | 75 +------------------
 .../demos/vk-video-dec/CMakeLists.txt         |  1 +
 .../test/vulkan-video-dec/CMakeLists.txt      |  1 +
 .../vulkan-video-simple-dec/CMakeLists.txt    |  1 +
 .../demos/vk-video-enc/CMakeLists.txt         |  1 +
 5 files changed, 5 insertions(+), 74 deletions(-)

diff --git a/common/libs/VkCodecUtils/VkVideoFrameToFile.cpp b/common/libs/VkCodecUtils/VkVideoFrameToFile.cpp
index c5ca2d0e..0336a7f5 100644
--- a/common/libs/VkCodecUtils/VkVideoFrameToFile.cpp
+++ b/common/libs/VkCodecUtils/VkVideoFrameToFile.cpp
@@ -23,80 +23,7 @@
 #include "VulkanDecodedFrame.h"
 #include "Helpers.h"
 #include "VkVideoFrameOutput.h"
-
-// CRC32 lookup table
-static unsigned long Crc32Table[256] = {
-    0x00000000,0x77073096,0xee0e612c,0x990951ba,
-    0x076dc419,0x706af48f,0xe963a535,0x9e6495a3,
-    0x0edb8832,0x79dcb8a4,0xe0d5e91e,0x97d2d988,
-    0x09b64c2b,0x7eb17cbd,0xe7b82d07,0x90bf1d91,
-    0x1db71064,0x6ab020f2,0xf3b97148,0x84be41de,
-    0x1adad47d,0x6ddde4eb,0xf4d4b551,0x83d385c7,
-    0x136c9856,0x646ba8c0,0xfd62f97a,0x8a65c9ec,
-    0x14015c4f,0x63066cd9,0xfa0f3d63,0x8d080df5,
-    0x3b6e20c8,0x4c69105e,0xd56041e4,0xa2677172,
-    0x3c03e4d1,0x4b04d447,0xd20d85fd,0xa50ab56b,
-    0x35b5a8fa,0x42b2986c,0xdbbbc9d6,0xacbcf940,
-    0x32d86ce3,0x45df5c75,0xdcd60dcf,0xabd13d59,
-    0x26d930ac,0x51de003a,0xc8d75180,0xbfd06116,
-    0x21b4f4b5,0x56b3c423,0xcfba9599,0xb8bda50f,
-    0x2802b89e,0x5f058808,0xc60cd9b2,0xb10be924,
-    0x2f6f7c87,0x58684c11,0xc1611dab,0xb6662d3d,
-    0x76dc4190,0x01db7106,0x98d220bc,0xefd5102a,
-    0x71b18589,0x06b6b51f,0x9fbfe4a5,0xe8b8d433,
-    0x7807c9a2,0x0f00f934,0x9609a88e,0xe10e9818,
-    0x7f6a0dbb,0x086d3d2d,0x91646c97,0xe6635c01,
-    0x6b6b51f4,0x1c6c6162,0x856530d8,0xf262004e,
-    0x6c0695ed,0x1b01a57b,0x8208f4c1,0xf50fc457,
-    0x65b0d9c6,0x12b7e950,0x8bbeb8ea,0xfcb9887c,
-    0x62dd1ddf,0x15da2d49,0x8cd37cf3,0xfbd44c65,
-    0x4db26158,0x3ab551ce,0xa3bc0074,0xd4bb30e2,
-    0x4adfa541,0x3dd895d7,0xa4d1c46d,0xd3d6f4fb,
-    0x4369e96a,0x346ed9fc,0xad678846,0xda60b8d0,
-    0x44042d73,0x33031de5,0xaa0a4c5f,0xdd0d7cc9,
-    0x5005713c,0x270241aa,0xbe0b1010,0xc90c2086,
-    0x5768b525,0x206f85b3,0xb966d409,0xce61e49f,
-    0x5edef90e,0x29d9c998,0xb0d09822,0xc7d7a8b4,
-    0x59b33d17,0x2eb40d81,0xb7bd5c3b,0xc0ba6cad,
-    0xedb88320,0x9abfb3b6,0x03b6e20c,0x74b1d29a,
-    0xead54739,0x9dd277af,0x04db2615,0x73dc1683,
-    0xe3630b12,0x94643b84,0x0d6d6a3e,0x7a6a5aa8,
-    0xe40ecf0b,0x9309ff9d,0x0a00ae27,0x7d079eb1,
-    0xf00f9344,0x8708a3d2,0x1e01f268,0x6906c2fe,
-    0xf762575d,0x806567cb,0x196c3671,0x6e6b06e7,
-    0xfed41b76,0x89d32be0,0x10da7a5a,0x67dd4acc,
-    0xf9b9df6f,0x8ebeeff9,0x17b7be43,0x60b08ed5,
-    0xd6d6a3e8,0xa1d1937e,0x38d8c2c4,0x4fdff252,
-    0xd1bb67f1,0xa6bc5767,0x3fb506dd,0x48b2364b,
-    0xd80d2bda,0xaf0a1b4c,0x36034af6,0x41047a60,
-    0xdf60efc3,0xa867df55,0x316e8eef,0x4669be79,
-    0xcb61b38c,0xbc66831a,0x256fd2a0,0x5268e236,
-    0xcc0c7795,0xbb0b4703,0x220216b9,0x5505262f,
-    0xc5ba3bbe,0xb2bd0b28,0x2bb45a92,0x5cb36a04,
-    0xc2d7ffa7,0xb5d0cf31,0x2cd99e8b,0x5bdeae1d,
-    0x9b64c2b0,0xec63f226,0x756aa39c,0x026d930a,
-    0x9c0906a9,0xeb0e363f,0x72076785,0x05005713,
-    0x95bf4a82,0xe2b87a14,0x7bb12bae,0x0cb61b38,
-    0x92d28e9b,0xe5d5be0d,0x7cdcefb7,0x0bdbdf21,
-    0x86d3d2d4,0xf1d4e242,0x68ddb3f8,0x1fda836e,
-    0x81be16cd,0xf6b9265b,0x6fb077e1,0x18b74777,
-    0x88085ae6,0xff0f6a70,0x66063bca,0x11010b5c,
-    0x8f659eff,0xf862ae69,0x616bffd3,0x166ccf45,
-    0xa00ae278,0xd70dd2ee,0x4e048354,0x3903b3c2,
-    0xa7672661,0xd06016f7,0x4969474d,0x3e6e77db,
-    0xaed16a4a,0xd9d65adc,0x40df0b66,0x37d83bf0,
-    0xa9bcae53,0xdebb9ec5,0x47b2cf7f,0x30b5ffe9,
-    0xbdbdf21c,0xcabac28a,0x53b39330,0x24b4a3a6,
-    0xbad03605,0xcdd70693,0x54de5729,0x23d967bf,
-    0xb3667a2e,0xc4614ab8,0x5d681b02,0x2a6f2b94,
-    0xb40bbe37,0xc30c8ea1,0x5a05df1b,0x2d02ef8d
-};
-
-static void getCRC(uint32_t *checksum, const uint8_t *inputBytes, size_t length, unsigned long crcTable[]) {
-    for (size_t i = 0; i < length; i += 1) {
-        *checksum = crcTable[inputBytes[i] ^ (*checksum & 0xff)] ^ (*checksum >> 8);
-    }
-}
+#include "crcgenerator.h"
 
 // Rotate right for 16-bit unsigned integers.
 // Used to normalize MSB-aligned high bit-depth samples (10-bit, 12-bit) to LSB-aligned.
diff --git a/vk_video_decoder/demos/vk-video-dec/CMakeLists.txt b/vk_video_decoder/demos/vk-video-dec/CMakeLists.txt
index 30e3e4cd..5ebba8a3 100644
--- a/vk_video_decoder/demos/vk-video-dec/CMakeLists.txt
+++ b/vk_video_decoder/demos/vk-video-dec/CMakeLists.txt
@@ -50,6 +50,7 @@ set(sources
     ${VK_VIDEO_COMMON_LIBS_SOURCE_ROOT}/VkCodecUtils/VulkanCommandBufferPool.cpp
     ${VK_VIDEO_COMMON_LIBS_SOURCE_ROOT}/VkCodecUtils/VulkanCommandBufferPool.h
     ${VK_VIDEO_COMMON_LIBS_SOURCE_ROOT}/VkCodecUtils/VkVideoFrameToFile.cpp
+    ${VK_VIDEO_COMMON_LIBS_SOURCE_ROOT}/VkCodecUtils/crcgenerator.cpp
     ${VK_VIDEO_DECODER_LIBS_SOURCE_ROOT}/VkDecoderUtils/VideoStreamDemuxer.cpp
     ${VK_VIDEO_DECODER_LIBS_SOURCE_ROOT}/VkDecoderUtils/VideoStreamDemuxer.h
     ${VK_VIDEO_DECODER_LIBS_SOURCE_ROOT}/VkDecoderUtils/ElementaryStream.cpp
diff --git a/vk_video_decoder/test/vulkan-video-dec/CMakeLists.txt b/vk_video_decoder/test/vulkan-video-dec/CMakeLists.txt
index 7f10d58f..084a6676 100644
--- a/vk_video_decoder/test/vulkan-video-dec/CMakeLists.txt
+++ b/vk_video_decoder/test/vulkan-video-dec/CMakeLists.txt
@@ -30,6 +30,7 @@ set(VULKAN_VIDEO_DEC_SOURCES
     ${VK_VIDEO_COMMON_LIBS_SOURCE_ROOT}/VkCodecUtils/VulkanSamplerYcbcrConversion.h
     ${VK_VIDEO_COMMON_LIBS_SOURCE_ROOT}/VkCodecUtils/nvVkFormats.cpp
     ${VK_VIDEO_COMMON_LIBS_SOURCE_ROOT}/VkCodecUtils/VkVideoFrameToFile.cpp
+    ${VK_VIDEO_COMMON_LIBS_SOURCE_ROOT}/VkCodecUtils/crcgenerator.cpp
     ${VK_VIDEO_DECODER_LIBS_SOURCE_ROOT}/VkDecoderUtils/VideoStreamDemuxer.cpp
     ${VK_VIDEO_DECODER_LIBS_SOURCE_ROOT}/VkDecoderUtils/VideoStreamDemuxer.h
     ${VK_VIDEO_DECODER_LIBS_SOURCE_ROOT}/VkDecoderUtils/ElementaryStream.cpp
diff --git a/vk_video_decoder/test/vulkan-video-simple-dec/CMakeLists.txt b/vk_video_decoder/test/vulkan-video-simple-dec/CMakeLists.txt
index 30cf00be..d533f95e 100644
--- a/vk_video_decoder/test/vulkan-video-simple-dec/CMakeLists.txt
+++ b/vk_video_decoder/test/vulkan-video-simple-dec/CMakeLists.txt
@@ -1,6 +1,7 @@
 set(VULKAN_VIDEO_SIMPLE_DEC_SOURCES
     Main.cpp
     ${VK_VIDEO_COMMON_LIBS_SOURCE_ROOT}/VkCodecUtils/nvVkFormats.cpp
+    ${VK_VIDEO_COMMON_LIBS_SOURCE_ROOT}/VkCodecUtils/crcgenerator.cpp
     ${VK_VIDEO_DECODER_LIBS_SOURCE_ROOT}/VkDecoderUtils/VideoStreamDemuxer.cpp
     ${VK_VIDEO_DECODER_LIBS_SOURCE_ROOT}/VkDecoderUtils/VideoStreamDemuxer.h
     ${VK_VIDEO_DECODER_LIBS_SOURCE_ROOT}/VkDecoderUtils/ElementaryStream.cpp
diff --git a/vk_video_encoder/demos/vk-video-enc/CMakeLists.txt b/vk_video_encoder/demos/vk-video-enc/CMakeLists.txt
index 7fe88cc3..33dfbc3e 100644
--- a/vk_video_encoder/demos/vk-video-enc/CMakeLists.txt
+++ b/vk_video_encoder/demos/vk-video-enc/CMakeLists.txt
@@ -80,6 +80,7 @@ set(sources
     ${VK_VIDEO_COMMON_LIBS_SOURCE_ROOT}/VkCodecUtils/nvVkFormats.cpp
     ${VK_VIDEO_COMMON_LIBS_SOURCE_ROOT}/VkCodecUtils/VulkanBistreamBufferImpl.h
     ${VK_VIDEO_COMMON_LIBS_SOURCE_ROOT}/VkCodecUtils/VulkanBistreamBufferImpl.cpp
+    ${VK_VIDEO_COMMON_LIBS_SOURCE_ROOT}/VkCodecUtils/crcgenerator.cpp
     ${VK_VIDEO_DECODER_LIBS_SOURCE_ROOT}/VulkanVideoFrameBuffer/VulkanVideoFrameBuffer.h
     ${VK_VIDEO_DECODER_LIBS_SOURCE_ROOT}/VulkanVideoFrameBuffer/VulkanVideoFrameBuffer.cpp
     )

From cfb93a786600c3a29acc140ae8cba5f52fbd5fa4 Mon Sep 17 00:00:00 2001
From: "Vassili Nikolaev (NVIDIA)" <vnikolaev@nvidia.com>
Date: Mon, 9 Jun 2025 09:15:59 -0500
Subject: [PATCH 06/14] encoder: Deal with the Vulkan chained stuctures

---
 common/libs/VkCodecUtils/Helpers.h            | 22 +++++++++---------
 .../libs/VkVideoEncoder/VkVideoEncoder.cpp    | 23 ++++++++-----------
 .../libs/VkVideoEncoder/VkVideoEncoder.h      |  2 +-
 3 files changed, 22 insertions(+), 25 deletions(-)

diff --git a/common/libs/VkCodecUtils/Helpers.h b/common/libs/VkCodecUtils/Helpers.h
index 333548e0..b74e71a3 100644
--- a/common/libs/VkCodecUtils/Helpers.h
+++ b/common/libs/VkCodecUtils/Helpers.h
@@ -320,7 +320,7 @@ inline VkResult WaitAndGetStatus(const VkInterfaceFunctions* vkIf, VkDevice devi
  }
 
 template<typename NodeType, typename ChainedNodeType>
-inline VkBaseInStructure* ChainNextVkStruct(NodeType& node, ChainedNodeType& nextChainedNode) {
+inline void ChainNextVkStruct(NodeType& node, ChainedNodeType& nextChainedNode) {
     // make sure the node is of type VkBaseInStructure
     static_assert(offsetof(NodeType, sType) == offsetof(VkBaseInStructure, sType),
                   "NodeType does not have sType at the same offset as VkBaseInStructure");
@@ -341,16 +341,16 @@ inline VkBaseInStructure* ChainNextVkStruct(NodeType& node, ChainedNodeType& nex
                   "ChainedNodeType must be a standard-layout type");
 
     assert(node.sType > 0);
-    VkBaseInStructure* pNode = (VkBaseInStructure*)&node;
-    while (pNode->pNext != nullptr) {
-         pNode = (VkBaseInStructure*)pNode->pNext;
-     }
-     pNode->pNext = (VkBaseInStructure*)&nextChainedNode;
-     // make sure the nextChainedNode is of type VkBaseInStructure
-     assert(nextChainedNode.sType > 0);
-     assert(nextChainedNode.pNext == nullptr);
-     return (VkBaseInStructure*)nextChainedNode.pNext;
- }
+    VkBaseInStructure* pNode = (VkBaseInStructure*)(&node);
+    VkBaseInStructure* pNextNode = (VkBaseInStructure*)(&nextChainedNode);
+
+    // The incoming object may not have anything chained.
+    assert(pNextNode->pNext == nullptr);
+
+    // Inserts the incoming object at the beginning of the list.
+    pNextNode->pNext = pNode->pNext;
+    pNode->pNext = pNextNode;
+}
 
 class DeviceUuidUtils
 {
diff --git a/vk_video_encoder/libs/VkVideoEncoder/VkVideoEncoder.cpp b/vk_video_encoder/libs/VkVideoEncoder/VkVideoEncoder.cpp
index 5468511c..536ad489 100644
--- a/vk_video_encoder/libs/VkVideoEncoder/VkVideoEncoder.cpp
+++ b/vk_video_encoder/libs/VkVideoEncoder/VkVideoEncoder.cpp
@@ -1658,12 +1658,9 @@ VkResult VkVideoEncoder::HandleCtrlCmd(VkSharedBaseObj<VkVideoEncodeFrameInfo>&
         encodeFrameInfo->qualityLevelInfo.sType  = VK_STRUCTURE_TYPE_VIDEO_ENCODE_QUALITY_LEVEL_INFO_KHR;
         encodeFrameInfo->qualityLevelInfo.qualityLevel = encodeFrameInfo->qualityLevel;
         if (pNext != nullptr) {
-            if (encodeFrameInfo->rateControlInfo.pNext == nullptr) {
-                encodeFrameInfo->rateControlInfo.pNext = pNext;
-            } else {
-                ((VkBaseInStructure*)(encodeFrameInfo->rateControlInfo.pNext))->pNext = pNext;
-            }
+            vk::ChainNextVkStruct(encodeFrameInfo->rateControlInfo, *pNext);
         }
+
         pNext = (VkBaseInStructure*)&encodeFrameInfo->qualityLevelInfo;
     }
 
@@ -1686,12 +1683,9 @@ VkResult VkVideoEncoder::HandleCtrlCmd(VkSharedBaseObj<VkVideoEncodeFrameInfo>&
         m_beginRateControlInfo = encodeFrameInfo->rateControlInfo;
 
         if (pNext != nullptr) {
-            if (encodeFrameInfo->rateControlInfo.pNext == nullptr) {
-                encodeFrameInfo->rateControlInfo.pNext = pNext;
-            } else {
-                ((VkBaseInStructure*)(encodeFrameInfo->rateControlInfo.pNext))->pNext = pNext;
-            }
+            vk::ChainNextVkStruct(encodeFrameInfo->rateControlInfo, *pNext);
         }
+
         pNext = (VkBaseInStructure*)&encodeFrameInfo->rateControlInfo;
     }
 
@@ -1771,7 +1765,8 @@ VkResult VkVideoEncoder::RecordVideoCodingCmd(VkSharedBaseObj<VkVideoEncodeFrame
         vkDevCtx->CmdControlVideoCodingKHR(cmdBuf, &renderControlInfo);
 
         m_beginRateControlInfo = *(VkVideoEncodeRateControlInfoKHR*)encodeFrameInfo->pControlCmdChain;
-        ((VkBaseInStructure*)(m_beginRateControlInfo.pNext))->pNext = NULL;
+        // Do not walk the chain, otherwise we end up creating a loop here.
+        m_beginRateControlInfo.pNext = (VkBaseInStructure*)(&encodeFrameInfo->pControlCmdChain);
     }
 
     if (m_videoMaintenance1FeaturesSupported)
@@ -1783,10 +1778,12 @@ VkResult VkVideoEncoder::RecordVideoCodingCmd(VkSharedBaseObj<VkVideoEncodeFrame
         videoInlineQueryInfoKHR.firstQuery = querySlotId;
         videoInlineQueryInfoKHR.queryCount = numQuerySamples;
         VkBaseInStructure* pStruct = (VkBaseInStructure*)&encodeFrameInfo->encodeInfo;
-        while (pStruct->pNext) pStruct = (VkBaseInStructure*)pStruct->pNext;
-        pStruct->pNext = (VkBaseInStructure*)&videoInlineQueryInfoKHR;
+        vk::ChainNextVkStruct(*pStruct, videoInlineQueryInfoKHR);
 
         vkDevCtx->CmdEncodeVideoKHR(cmdBuf, &encodeFrameInfo->encodeInfo);
+
+        // Remove the stack pointer from the chain, causes a use after free otherwise in GetEncodeFrameInfoH264
+        encodeFrameInfo->encodeInfo.pNext = videoInlineQueryInfoKHR.pNext;
     }
     else
     {
diff --git a/vk_video_encoder/libs/VkVideoEncoder/VkVideoEncoder.h b/vk_video_encoder/libs/VkVideoEncoder/VkVideoEncoder.h
index c939bda6..dacc2929 100644
--- a/vk_video_encoder/libs/VkVideoEncoder/VkVideoEncoder.h
+++ b/vk_video_encoder/libs/VkVideoEncoder/VkVideoEncoder.h
@@ -58,7 +58,7 @@ class VkVideoEncoder : public VkVideoRefCountBase {
     {
         VkStructureType GetType() {
             return (encodeInfo.pNext == nullptr) ?
-                    VK_STRUCTURE_TYPE_VIDEO_ENCODE_INFO_KHR : ((VkBaseInStructure*)encodeInfo.pNext)->sType;
+                    VK_STRUCTURE_TYPE_VIDEO_ENCODE_INFO_KHR : reinterpret_cast<const VkBaseInStructure*>(encodeInfo.pNext)->sType;
         }
 
         VkVideoEncodeFrameInfo(const void* pNext = nullptr)

From 990ca866e9f62b31ebddcd8e26e2b122c22e3d4e Mon Sep 17 00:00:00 2001
From: "Vassili Nikolaev (NVIDIA)" <vnikolaev@nvidia.com>
Date: Fri, 23 May 2025 08:50:47 -0700
Subject: [PATCH 07/14] common: Fix frame to file from adding .yuv to filenames
 with .y4m already

Signed-off-by: Vassili Nikolaev (NVIDIA) <vnikolaev@nvidia.com>
---
 common/libs/VkCodecUtils/VkVideoFrameToFile.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/common/libs/VkCodecUtils/VkVideoFrameToFile.cpp b/common/libs/VkCodecUtils/VkVideoFrameToFile.cpp
index 0336a7f5..1ca1e2f6 100644
--- a/common/libs/VkCodecUtils/VkVideoFrameToFile.cpp
+++ b/common/libs/VkCodecUtils/VkVideoFrameToFile.cpp
@@ -218,7 +218,7 @@ class VkVideoFrameToFileImpl : public VkVideoFrameOutput {
                       << std::endl;
             fileNameWithModExt = fileName + std::string(".y4m");
             fileName = fileNameWithModExt.c_str();
-        } else if (!hasExtension(fileName, ".yuv")) {
+        } else if ((y4mFormat == false) && !hasExtension(fileName, ".yuv")) {
             std::cout << std::endl << "Raw yuv output format is requested, ";
             std::cout << "but the output file's (" << fileName << ") extension isn't .yuv!"
                       << std::endl;

From 0b14224410e58f1ae807ecc3dd3292b59a397713 Mon Sep 17 00:00:00 2001
From: "Vassili Nikolaev (NVIDIA)" <vnikolaev@nvidia.com>
Date: Tue, 17 Jun 2025 15:57:24 -0700
Subject: [PATCH 08/14] cmake: Add cast-qual to the cmake settings

Signed-off-by: Vassili Nikolaev (NVIDIA) <vnikolaev@nvidia.com>
---
 cmake/LinuxSettings.cmake | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cmake/LinuxSettings.cmake b/cmake/LinuxSettings.cmake
index a90e96ee..f9f3c727 100644
--- a/cmake/LinuxSettings.cmake
+++ b/cmake/LinuxSettings.cmake
@@ -84,7 +84,7 @@ endif()
 
 # Compiler flags for GCC/Clang
 if(CMAKE_COMPILER_IS_GNUCC OR CMAKE_C_COMPILER_ID MATCHES "Clang")
-    set(COMMON_COMPILE_FLAGS "-Wall -Wextra -Wundef -Wno-unused-parameter -Wno-missing-field-initializers -Wshadow")
+    set(COMMON_COMPILE_FLAGS "-Wall -Wextra -Wundef -Wno-unused-parameter -Wno-missing-field-initializers -Wshadow -Wcast-qual")
     set(COMMON_COMPILE_FLAGS "${COMMON_COMPILE_FLAGS} -fno-strict-aliasing -fno-builtin-memcmp")
 
     # Warning about implicit fallthrough in switch blocks

From ec05fb5469779f1ae1f76ec45fc1c095aebe68c8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?St=C3=A9phane=20Cerveau?= <scerveau@igalia.com>
Date: Mon, 15 Dec 2025 17:34:56 +0100
Subject: [PATCH 09/14] EncoderConfig: fix override error to use const

DoParseArguments changed with:

a934d3b common: Code compilation fixes
---
 vk_video_encoder/libs/VkVideoEncoder/VkEncoderConfigH264.cpp | 2 +-
 vk_video_encoder/libs/VkVideoEncoder/VkEncoderConfigH264.h   | 2 +-
 vk_video_encoder/libs/VkVideoEncoder/VkEncoderConfigH265.cpp | 2 +-
 vk_video_encoder/libs/VkVideoEncoder/VkEncoderConfigH265.h   | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/vk_video_encoder/libs/VkVideoEncoder/VkEncoderConfigH264.cpp b/vk_video_encoder/libs/VkVideoEncoder/VkEncoderConfigH264.cpp
index 68829578..e9c94bed 100644
--- a/vk_video_encoder/libs/VkVideoEncoder/VkEncoderConfigH264.cpp
+++ b/vk_video_encoder/libs/VkVideoEncoder/VkEncoderConfigH264.cpp
@@ -17,7 +17,7 @@
 #include "VkVideoEncoder/VkEncoderConfigH264.h"
 #include "VkVideoEncoder/VkVideoEncoderH264.h"
 
-int EncoderConfigH264::DoParseArguments(int argc, char* argv[])
+int EncoderConfigH264::DoParseArguments(int argc, const char* argv[])
 {
     std::vector<std::string> args(argv, argv + argc);
 
diff --git a/vk_video_encoder/libs/VkVideoEncoder/VkEncoderConfigH264.h b/vk_video_encoder/libs/VkVideoEncoder/VkEncoderConfigH264.h
index fb1c0611..6d8865a5 100644
--- a/vk_video_encoder/libs/VkVideoEncoder/VkEncoderConfigH264.h
+++ b/vk_video_encoder/libs/VkVideoEncoder/VkEncoderConfigH264.h
@@ -156,7 +156,7 @@ struct EncoderConfigH264 : public EncoderConfig {
     const LevelLimits* levelLimits;
     size_t levelLimitsSize;
 
-    virtual int DoParseArguments(int argc, char* argv[]) override;
+    virtual int DoParseArguments(int argc, const char* argv[]) override;
 
     StdVideoH264LevelIdc DetermineLevel(uint8_t dpbSize,
                                         uint32_t bitrate,
diff --git a/vk_video_encoder/libs/VkVideoEncoder/VkEncoderConfigH265.cpp b/vk_video_encoder/libs/VkVideoEncoder/VkEncoderConfigH265.cpp
index b4a03ce1..33bcc53e 100644
--- a/vk_video_encoder/libs/VkVideoEncoder/VkEncoderConfigH265.cpp
+++ b/vk_video_encoder/libs/VkVideoEncoder/VkEncoderConfigH265.cpp
@@ -68,7 +68,7 @@ uint32_t EncoderConfigH265::GetCpbVclFactor()
     return baseFactor + depthFactor;
 }
 
-int EncoderConfigH265::DoParseArguments(int argc, char* argv[])
+int EncoderConfigH265::DoParseArguments(int argc, const char* argv[])
 {
     std::vector<std::string> args(argv, argv + argc);
 
diff --git a/vk_video_encoder/libs/VkVideoEncoder/VkEncoderConfigH265.h b/vk_video_encoder/libs/VkVideoEncoder/VkEncoderConfigH265.h
index ebc5ca38..774bf1a9 100644
--- a/vk_video_encoder/libs/VkVideoEncoder/VkEncoderConfigH265.h
+++ b/vk_video_encoder/libs/VkVideoEncoder/VkEncoderConfigH265.h
@@ -135,7 +135,7 @@ struct EncoderConfigH265 : public EncoderConfig {
         return this;
     }
 
-    virtual int DoParseArguments(int argc, char* argv[]) override;
+    virtual int DoParseArguments(int argc, const char* argv[]) override;
 
     uint32_t GetCtbAlignedPicSizeInSamples(uint32_t& picWidthInCtbsY, uint32_t& picHeightInCtbsY, bool minCtbsY = false);
 

From fb17b8e25612c0f33fd0a8dc31d0791cd81d66c0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?St=C3=A9phane=20Cerveau?= <scerveau@igalia.com>
Date: Mon, 15 Dec 2025 17:36:50 +0100
Subject: [PATCH 10/14] VulkanFilterYuvCompute: fix shadowed struct
 declarations

Use explicit struct names to avoid local PushConstants struct
definitions from shadowing the class member PushConstants.
---
 .../VkCodecUtils/VulkanFilterYuvCompute.cpp   | 24 +++++++++----------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/common/libs/VkCodecUtils/VulkanFilterYuvCompute.cpp b/common/libs/VkCodecUtils/VulkanFilterYuvCompute.cpp
index 597f5d7c..64582775 100644
--- a/common/libs/VkCodecUtils/VulkanFilterYuvCompute.cpp
+++ b/common/libs/VkCodecUtils/VulkanFilterYuvCompute.cpp
@@ -2474,7 +2474,7 @@ VkResult VulkanFilterYuvCompute::RecordCommandBuffer(VkCommandBuffer cmdBuf,
         ivec2(int32_t width_, int32_t height_) : width(width_), height(height_) {}
     };
 
-    struct PushConstants {
+    struct ImagePushConstants {
         uint32_t srcLayer;
         uint32_t dstLayer;
         ivec2    inputSize;
@@ -2487,7 +2487,7 @@ VkResult VulkanFilterYuvCompute::RecordCommandBuffer(VkCommandBuffer cmdBuf,
         uint32_t crPitch;   // Cr plane pitch
     };
 
-    PushConstants pushConstants = {
+    ImagePushConstants pushConstants = {
             inImageResourceInfo->baseArrayLayer, // Set the source layer index
             outImageResourceInfo->baseArrayLayer, // Set the destination layer index
             ivec2(inImageResourceInfo->codedExtent.width, inImageResourceInfo->codedExtent.height),
@@ -2504,7 +2504,7 @@ VkResult VulkanFilterYuvCompute::RecordCommandBuffer(VkCommandBuffer cmdBuf,
                                  m_descriptorSetLayout.GetPipelineLayout(),
                                  VK_SHADER_STAGE_COMPUTE_BIT,
                                  0,
-                                 sizeof(PushConstants),
+                                 sizeof(ImagePushConstants),
                                  &pushConstants);
 
     const uint32_t  workgroupWidth  = (pushConstants.outputSize.width  + (m_workgroupSizeX - 1)) / m_workgroupSizeX;
@@ -2625,7 +2625,7 @@ VkResult VulkanFilterYuvCompute::RecordCommandBuffer(VkCommandBuffer cmdBuf,
         ivec2(int32_t width_, int32_t height_) : width(width_), height(height_) {}
     };
 
-    struct PushConstants {
+    struct BufferToImagePushConstants {
         uint32_t srcLayer;
         uint32_t dstLayer;
         ivec2    inputSize;
@@ -2657,7 +2657,7 @@ VkResult VulkanFilterYuvCompute::RecordCommandBuffer(VkCommandBuffer cmdBuf,
     VkDeviceSize cbOffset = yOffset + planeSize;
     VkDeviceSize crOffset = cbOffset + (planeSize / 4);
 
-    PushConstants pushConstants = {
+    BufferToImagePushConstants pushConstants = {
             pBufferImageCopy->imageSubresource.baseArrayLayer,
             outImageResourceInfo->baseArrayLayer,
             ivec2(width, height),
@@ -2674,7 +2674,7 @@ VkResult VulkanFilterYuvCompute::RecordCommandBuffer(VkCommandBuffer cmdBuf,
                                  m_descriptorSetLayout.GetPipelineLayout(),
                                  VK_SHADER_STAGE_COMPUTE_BIT,
                                  0,
-                                 sizeof(PushConstants),
+                                 sizeof(BufferToImagePushConstants),
                                  &pushConstants);
 
     const uint32_t workgroupWidth = (pushConstants.outputSize.width + (m_workgroupSizeX - 1)) / m_workgroupSizeX;
@@ -2792,7 +2792,7 @@ VkResult VulkanFilterYuvCompute::RecordCommandBuffer(VkCommandBuffer cmdBuf,
         ivec2(int32_t width_, int32_t height_) : width(width_), height(height_) {}
     };
 
-    struct PushConstants {
+    struct ImageToBufferPushConstants {
         uint32_t srcLayer;
         uint32_t dstLayer;
         ivec2    inputSize;
@@ -2828,7 +2828,7 @@ VkResult VulkanFilterYuvCompute::RecordCommandBuffer(VkCommandBuffer cmdBuf,
     VkDeviceSize cbOffset = yOffset + planeSize;
     VkDeviceSize crOffset = cbOffset + (planeSize / 4);
 
-    PushConstants pushConstants = {
+    ImageToBufferPushConstants pushConstants = {
             inImageResourceInfo->baseArrayLayer,
             0, // Destination layer (buffer has no layers)
             ivec2(inputExtent.width, inputExtent.height),
@@ -2845,7 +2845,7 @@ VkResult VulkanFilterYuvCompute::RecordCommandBuffer(VkCommandBuffer cmdBuf,
                                m_descriptorSetLayout.GetPipelineLayout(),
                                VK_SHADER_STAGE_COMPUTE_BIT,
                                0,
-                               sizeof(PushConstants),
+                               sizeof(ImageToBufferPushConstants),
                                &pushConstants);
 
     const uint32_t workgroupWidth = (pushConstants.outputSize.width + (m_workgroupSizeX - 1)) / m_workgroupSizeX;
@@ -2965,7 +2965,7 @@ VkResult VulkanFilterYuvCompute::RecordCommandBuffer(VkCommandBuffer cmdBuf,
         ivec2(int32_t width_, int32_t height_) : width(width_), height(height_) {}
     };
 
-    struct PushConstants {
+    struct BufferToBufferPushConstants {
         uint32_t srcLayer;    // src image layer to use
         uint32_t dstLayer;    // dst image layer to use
         ivec2    inputSize;   // input image or buffer extent
@@ -2991,7 +2991,7 @@ VkResult VulkanFilterYuvCompute::RecordCommandBuffer(VkCommandBuffer cmdBuf,
     VkDeviceSize cbOffset = planeSize;
     VkDeviceSize crOffset = cbOffset + (planeSize / 4);
 
-    PushConstants pushConstants = {
+    BufferToBufferPushConstants pushConstants = {
             0, // Source layer (buffer has no layers)
             0, // Destination layer (buffer has no layers)
             ivec2(inBufferExtent.width, inBufferExtent.height),
@@ -3008,7 +3008,7 @@ VkResult VulkanFilterYuvCompute::RecordCommandBuffer(VkCommandBuffer cmdBuf,
                                m_descriptorSetLayout.GetPipelineLayout(),
                                VK_SHADER_STAGE_COMPUTE_BIT,
                                0,
-                               sizeof(PushConstants),
+                               sizeof(BufferToBufferPushConstants),
                                &pushConstants);
 
     const uint32_t workgroupWidth = (pushConstants.outputSize.width + (m_workgroupSizeX - 1)) / m_workgroupSizeX;

From 8bcd876498bee2ff0a104670350fbfbd93d6fd57 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?St=C3=A9phane=20Cerveau?= <scerveau@igalia.com>
Date: Tue, 16 Dec 2025 17:10:01 +0100
Subject: [PATCH 11/14] VulkanFilterYuvCompute: fix unused variable numPlanes

numPlanes will be used only in Debug as assert are disabled in
release.

This is a fixup of "encode: remove the CPU input conversion function"
---
 common/libs/VkCodecUtils/VulkanFilterYuvCompute.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/common/libs/VkCodecUtils/VulkanFilterYuvCompute.cpp b/common/libs/VkCodecUtils/VulkanFilterYuvCompute.cpp
index 64582775..dcea5b41 100644
--- a/common/libs/VkCodecUtils/VulkanFilterYuvCompute.cpp
+++ b/common/libs/VkCodecUtils/VulkanFilterYuvCompute.cpp
@@ -2327,7 +2327,7 @@ uint32_t VulkanFilterYuvCompute::UpdateImageDescriptorSets(
 
     validImageAspects &= validAspects;
     uint32_t curImageAspect = 0;
-    const uint32_t numPlanes = imageView->GetNumberOfPlanes();
+    [[maybe_unused]] const uint32_t numPlanes = imageView->GetNumberOfPlanes();
     while(validImageAspects) {
 
         if (validImageAspects & (VK_IMAGE_ASPECT_COLOR_BIT << curImageAspect) ) {

From 18d19ae736c18746ae66471c7fc767c2785a8fd7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?St=C3=A9phane=20Cerveau?= <scerveau@igalia.com>
Date: Fri, 20 Feb 2026 16:04:14 +0100
Subject: [PATCH 12/14] common: Code compilation fixes(khr fixes)

This fix allows to pass 10 bits decode tests
---
 common/libs/VkCodecUtils/VkVideoFrameToFile.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/common/libs/VkCodecUtils/VkVideoFrameToFile.cpp b/common/libs/VkCodecUtils/VkVideoFrameToFile.cpp
index 1ca1e2f6..6285d2aa 100644
--- a/common/libs/VkCodecUtils/VkVideoFrameToFile.cpp
+++ b/common/libs/VkCodecUtils/VkVideoFrameToFile.cpp
@@ -430,7 +430,7 @@ class VkVideoFrameToFileImpl : public VkVideoFrameOutput {
                 assert(layouts[plane].rowPitch <= SIZE_MAX);
                 assert(yuvPlaneLayouts[plane].rowPitch <= SIZE_MAX);
                 CopyPlaneData<uint16_t>(pSrc, pDst, static_cast<size_t>(layouts[plane].rowPitch), static_cast<size_t>(yuvPlaneLayouts[plane].rowPitch),
-                                       frameWidth, imageHeight);
+                                       frameWidth, imageHeight, 1, bitShift);
             }
         }
 
@@ -457,7 +457,7 @@ class VkVideoFrameToFileImpl : public VkVideoFrameOutput {
                     assert(layouts[srcPlane].rowPitch <= SIZE_MAX);
                     assert(yuvPlaneLayouts[plane].rowPitch <= SIZE_MAX);
                     CopyPlaneData<uint16_t>(pSrc, pDst, static_cast<size_t>(layouts[srcPlane].rowPitch), static_cast<size_t>(yuvPlaneLayouts[plane].rowPitch),
-                                            planeWidth, 1, 2);
+                                            planeWidth, 1, 2, bitShift);
                 }
                 pDst += yuvPlaneLayouts[plane].rowPitch;
             }

From c8ac37ae244b298c1748680e920e04b6a8d5aaed Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?St=C3=A9phane=20Cerveau?= <scerveau@igalia.com>
Date: Thu, 19 Feb 2026 17:53:19 +0100
Subject: [PATCH 13/14] encoder: restore CPU fallback and support 3-plane image
 copy

When SHADERC is not enabled, the GPU compute filter is compiled out and
m_inputComputeFilter stays nullptr. Restore the CPU I420-to-NV12 and
I444-to-P444 conversion path from YCbCrConvUtilsCpu as a fallback,
and configure the linear staging image pool with m_imageInFormat
(hardware 2-plane format) instead of the raw input format.

Additionally, extend CopyLinearToOptimalImage to handle 3-plane formats
dynamically instead of asserting that only 2-plane formats are supported.
---
 .../libs/VkVideoEncoder/VkVideoEncoder.cpp    | 131 ++++++++++++++++--
 1 file changed, 117 insertions(+), 14 deletions(-)

diff --git a/vk_video_encoder/libs/VkVideoEncoder/VkVideoEncoder.cpp b/vk_video_encoder/libs/VkVideoEncoder/VkVideoEncoder.cpp
index 536ad489..84d96deb 100644
--- a/vk_video_encoder/libs/VkVideoEncoder/VkVideoEncoder.cpp
+++ b/vk_video_encoder/libs/VkVideoEncoder/VkVideoEncoder.cpp
@@ -167,16 +167,94 @@ VkResult VkVideoEncoder::LoadNextFrame(VkSharedBaseObj<VkVideoEncodeFrameInfo>&
     // NOTE: Get image layout
     const VkSubresourceLayout* dstSubresourceLayout = dstImageResource->GetSubresourceLayout();
 
-    // Direct plane copy - no color space conversion needed
-    CopyYCbCrPlanesDirectCPU(
-            pInputFrameData,                                               // Source buffer
-            m_encoderConfig->input.planeLayouts,                           // Source layouts
-            writeImagePtr,                                                 // Destination buffer
-            dstSubresourceLayout,                                          // Destination layouts
-            std::min(m_encoderConfig->encodeWidth, m_encoderConfig->input.width),    // Width
-            std::min(m_encoderConfig->encodeHeight, m_encoderConfig->input.height),  // Height
-            m_encoderConfig->input.numPlanes,                              // Number of planes
-            m_encoderConfig->input.vkFormat);                              // Format for subsampling detection
+    const uint32_t width  = std::min(m_encoderConfig->encodeWidth,  m_encoderConfig->input.width);
+    const uint32_t height = std::min(m_encoderConfig->encodeHeight, m_encoderConfig->input.height);
+
+    if (m_inputComputeFilter != nullptr) {
+        // Compute filter available: direct plane copy, GPU filter handles conversion
+        CopyYCbCrPlanesDirectCPU(
+                pInputFrameData,                                           // Source buffer
+                m_encoderConfig->input.planeLayouts,                       // Source layouts
+                writeImagePtr,                                             // Destination buffer
+                dstSubresourceLayout,                                      // Destination layouts
+                width, height,
+                m_encoderConfig->input.numPlanes,                          // Number of planes
+                m_encoderConfig->input.vkFormat);                          // Format for subsampling detection
+    } else {
+        // No compute filter: CPU conversion from 3-plane to 2-plane format
+        int yCbCrConvResult = 0;
+        if (m_encoderConfig->input.bpp == 8) {
+            if (m_encoderConfig->encodeChromaSubsampling == VK_VIDEO_CHROMA_SUBSAMPLING_444_BIT_KHR) {
+                yCbCrConvResult = YCbCrConvUtilsCpu<uint8_t>::I444ToP444(
+                        pInputFrameData + m_encoderConfig->input.planeLayouts[0].offset,
+                        (int)m_encoderConfig->input.planeLayouts[0].rowPitch,
+                        pInputFrameData + m_encoderConfig->input.planeLayouts[1].offset,
+                        (int)m_encoderConfig->input.planeLayouts[1].rowPitch,
+                        pInputFrameData + m_encoderConfig->input.planeLayouts[2].offset,
+                        (int)m_encoderConfig->input.planeLayouts[2].rowPitch,
+                        writeImagePtr + dstSubresourceLayout[0].offset,
+                        (int)dstSubresourceLayout[0].rowPitch,
+                        writeImagePtr + dstSubresourceLayout[1].offset,
+                        (int)dstSubresourceLayout[1].rowPitch,
+                        width, height);
+            } else {
+                yCbCrConvResult = YCbCrConvUtilsCpu<uint8_t>::I420ToNV12(
+                        pInputFrameData + m_encoderConfig->input.planeLayouts[0].offset,
+                        (int)m_encoderConfig->input.planeLayouts[0].rowPitch,
+                        pInputFrameData + m_encoderConfig->input.planeLayouts[1].offset,
+                        (int)m_encoderConfig->input.planeLayouts[1].rowPitch,
+                        pInputFrameData + m_encoderConfig->input.planeLayouts[2].offset,
+                        (int)m_encoderConfig->input.planeLayouts[2].rowPitch,
+                        writeImagePtr + dstSubresourceLayout[0].offset,
+                        (int)dstSubresourceLayout[0].rowPitch,
+                        writeImagePtr + dstSubresourceLayout[1].offset,
+                        (int)dstSubresourceLayout[1].rowPitch,
+                        width, height);
+            }
+        } else if (m_encoderConfig->input.bpp == 10 || m_encoderConfig->input.bpp == 12) {
+            int shiftBits = 0;
+            if (m_encoderConfig->input.msbShift >= 0) {
+                shiftBits = m_encoderConfig->input.msbShift;
+            } else {
+                shiftBits = 16 - m_encoderConfig->input.bpp;
+            }
+
+            if (m_encoderConfig->encodeChromaSubsampling == VK_VIDEO_CHROMA_SUBSAMPLING_444_BIT_KHR) {
+                yCbCrConvResult = YCbCrConvUtilsCpu<uint16_t>::I444ToP444(
+                        (const uint16_t*)(pInputFrameData + m_encoderConfig->input.planeLayouts[0].offset),
+                        (int)m_encoderConfig->input.planeLayouts[0].rowPitch,
+                        (const uint16_t*)(pInputFrameData + m_encoderConfig->input.planeLayouts[1].offset),
+                        (int)m_encoderConfig->input.planeLayouts[1].rowPitch,
+                        (const uint16_t*)(pInputFrameData + m_encoderConfig->input.planeLayouts[2].offset),
+                        (int)m_encoderConfig->input.planeLayouts[2].rowPitch,
+                        (uint16_t*)(writeImagePtr + dstSubresourceLayout[0].offset),
+                        (int)dstSubresourceLayout[0].rowPitch,
+                        (uint16_t*)(writeImagePtr + dstSubresourceLayout[1].offset),
+                        (int)dstSubresourceLayout[1].rowPitch,
+                        width, height, shiftBits);
+            } else {
+                yCbCrConvResult = YCbCrConvUtilsCpu<uint16_t>::I420ToNV12(
+                        (const uint16_t*)(pInputFrameData + m_encoderConfig->input.planeLayouts[0].offset),
+                        (int)m_encoderConfig->input.planeLayouts[0].rowPitch,
+                        (const uint16_t*)(pInputFrameData + m_encoderConfig->input.planeLayouts[1].offset),
+                        (int)m_encoderConfig->input.planeLayouts[1].rowPitch,
+                        (const uint16_t*)(pInputFrameData + m_encoderConfig->input.planeLayouts[2].offset),
+                        (int)m_encoderConfig->input.planeLayouts[2].rowPitch,
+                        (uint16_t*)(writeImagePtr + dstSubresourceLayout[0].offset),
+                        (int)dstSubresourceLayout[0].rowPitch,
+                        (uint16_t*)(writeImagePtr + dstSubresourceLayout[1].offset),
+                        (int)dstSubresourceLayout[1].rowPitch,
+                        width, height, shiftBits);
+            }
+        } else {
+            assert(!"Requested bit-depth is not supported!");
+            return VK_ERROR_INITIALIZATION_FAILED;
+        }
+
+        if (yCbCrConvResult != 0) {
+            return VK_ERROR_INITIALIZATION_FAILED;
+        }
+    }
 
     // Now stage the input frame for the encoder video input
     return StageInputFrame(encodeFrameInfo);
@@ -998,9 +1076,21 @@ VkResult VkVideoEncoder::InitEncoder(VkSharedBaseObj<EncoderConfig>& encoderConf
         std::max(m_maxCodedExtent.height, encoderConfig->input.height)
     };
 
+    // When compute filter is available, the linear image stores raw input format
+    // and the filter handles conversion. Without it, the linear image must match
+    // the encode source format since CopyLinearToOptimalImage does no conversion.
+    const VkFormat linearImageFormat =
+#ifdef SHADERC_SUPPORT
+        encoderConfig->enablePreprocessComputeFilter
+            ? encoderConfig->input.vkFormat
+            : m_imageInFormat;
+#else
+        m_imageInFormat;
+#endif
+
     result = m_linearInputImagePool->Configure( m_vkDevCtx,
                                                 encoderConfig->numInputImages,
-                                                encoderConfig->input.vkFormat,
+                                                linearImageFormat,
                                                 linearInputImageExtent,
                                                   ( VK_IMAGE_USAGE_SAMPLED_BIT |
                                                     VK_IMAGE_USAGE_STORAGE_BIT |
@@ -1497,8 +1587,9 @@ VkResult VkVideoEncoder::CopyLinearToOptimalImage(VkCommandBuffer& commandBuffer
     // Bind memory for the image.
     const VkMpFormatInfo* mpInfo = YcbcrVkFormatInfo(format);
 
-    // Currently formats that have more than 2 output planes are not supported. 444 formats have a shared CbCr planes in all current tests
-    assert((mpInfo->vkPlaneFormat[2] == VK_FORMAT_UNDEFINED) && (mpInfo->vkPlaneFormat[3] == VK_FORMAT_UNDEFINED));
+    // Determine number of planes: 1 (base) + numberOfExtraPlanes
+    const uint32_t numPlanes = 1 + mpInfo->planesLayout.numberOfExtraPlanes;
+    assert(numPlanes >= 1 && numPlanes <= 3);
 
     // Copy src buffer to image.
     VkImageCopy copyRegion[3]{};
@@ -1533,9 +1624,21 @@ VkResult VkVideoEncoder::CopyLinearToOptimalImage(VkCommandBuffer& commandBuffer
     copyRegion[1].dstSubresource.baseArrayLayer = dstCopyArrayLayer;
     copyRegion[1].dstSubresource.layerCount = 1;
 
+    if (numPlanes > 2) {
+        copyRegion[2].extent = copyRegion[1].extent;
+        copyRegion[2].srcSubresource.aspectMask = VK_IMAGE_ASPECT_PLANE_2_BIT;
+        copyRegion[2].srcSubresource.mipLevel = 0;
+        copyRegion[2].srcSubresource.baseArrayLayer = srcCopyArrayLayer;
+        copyRegion[2].srcSubresource.layerCount = 1;
+        copyRegion[2].dstSubresource.aspectMask = VK_IMAGE_ASPECT_PLANE_2_BIT;
+        copyRegion[2].dstSubresource.mipLevel = 0;
+        copyRegion[2].dstSubresource.baseArrayLayer = dstCopyArrayLayer;
+        copyRegion[2].dstSubresource.layerCount = 1;
+    }
+
     m_vkDevCtx->CmdCopyImage(commandBuffer, srcImageResource->GetImage(), srcImageLayout,
                              dstImageResource->GetImage(), dstImageLayout,
-                             (uint32_t)2, copyRegion);
+                             numPlanes, copyRegion);
 
     {
         VkMemoryBarrier memoryBarrier = {VK_STRUCTURE_TYPE_MEMORY_BARRIER};

From 3a6646720697c7b2d8e7fdbfccaeba76aca1d2a0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?St=C3=A9phane=20Cerveau?= <scerveau@igalia.com>
Date: Fri, 20 Feb 2026 16:09:47 +0100
Subject: [PATCH 14/14] CMake: enable USE_ENCODER_SHADERC by default and skip
 search when OFF

USE_ENCODER_SHADERC was never declared as an option, so it defaulted to
OFF
unless explicitly passed on the command line. Declare it as an option
with ON as the default so the GPU compute filter path is always built.

When USE_ENCODER_SHADERC=OFF and encoder only, skip FindShaderc.cmake
entirely to avoid unnecessary dependency searches.
---
 CMakeLists.txt                                     | 5 ++++-
 vk_video_encoder/demos/vk-video-enc/CMakeLists.txt | 2 +-
 vk_video_encoder/libs/CMakeLists.txt               | 6 +++---
 3 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 50a16a32..b2ef28f5 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -54,7 +54,10 @@ set (VULKAN_SDK_MIN_MINOR_VERSION 4)
 set (VULKAN_SDK_MIN_PATCH_VERSION 321)
 FIND_VULKAN_SDK(${VULKAN_SDK_MIN_MAJOR_VERSION} ${VULKAN_SDK_MIN_MINOR_VERSION} ${VULKAN_SDK_MIN_PATCH_VERSION})
 
-include(FindShaderc)
+option(USE_ENCODER_SHADERC "Enable shaderc GPU compute filters for encoder (e.g. YUV conversion). Only affects the encoder build; the decoder always uses shaderc." ON)
+if(BUILD_DECODER OR USE_ENCODER_SHADERC)
+    include(FindShaderc)
+endif()
 
 ############ VULKAN_FFMPEG_LIB_PATH ######################################
 if (DEFINED ENV{VULKAN_FFMPEG_LIB_DIR_PATH})
diff --git a/vk_video_encoder/demos/vk-video-enc/CMakeLists.txt b/vk_video_encoder/demos/vk-video-enc/CMakeLists.txt
index 33dfbc3e..b043412b 100644
--- a/vk_video_encoder/demos/vk-video-enc/CMakeLists.txt
+++ b/vk_video_encoder/demos/vk-video-enc/CMakeLists.txt
@@ -119,7 +119,7 @@ if(TARGET vulkan)
     list(APPEND definitions PRIVATE -DUNINSTALLED_LOADER="$<TARGET_FILE:vulkan>")
 endif()
 
-if(USE_SHADERC)
+if(USE_ENCODER_SHADERC)
     list(APPEND definitions PRIVATE -DSHADERC_SUPPORT)
 endif()
 
diff --git a/vk_video_encoder/libs/CMakeLists.txt b/vk_video_encoder/libs/CMakeLists.txt
index 5cca8809..66685d33 100644
--- a/vk_video_encoder/libs/CMakeLists.txt
+++ b/vk_video_encoder/libs/CMakeLists.txt
@@ -88,7 +88,7 @@ set(LIBVKVIDEOENCODER_DEFINITIONS
     PRIVATE VK_VIDEO_ENCODER_IMPLEMENTATION
     PUBLIC VK_VIDEO_ENCODER_SHAREDLIB)
 
-if(USE_SHADERC)
+if(USE_ENCODER_SHADERC)
 list(APPEND LIBVKVIDEOENCODER_SRC
     ${VK_VIDEO_COMMON_LIBS_SOURCE_ROOT}/VkCodecUtils/VulkanShaderCompiler.cpp
     ${VK_VIDEO_COMMON_LIBS_SOURCE_ROOT}/VkCodecUtils/VulkanShaderCompiler.h
@@ -108,7 +108,7 @@ include_directories(BEFORE ${VK_VIDEO_COMMON_LIBS_SOURCE_ROOT})
 set(LIBVKVIDEOENCODER_DEPENDENCIES GenerateDispatchTables ${VULKAN_VIDEO_PARSER_LIB})
 add_library(${VULKAN_VIDEO_ENCODER_LIB} SHARED ${LIBVKVIDEOENCODER_SRC})
 
-if(USE_SHADERC)
+if(USE_ENCODER_SHADERC)
     # Link the libraries
     target_link_libraries(${VULKAN_VIDEO_ENCODER_LIB} PUBLIC ${SHADERC_SHARED_LIBRARY})
     # Ensure the library depends on the generation of these files
@@ -137,7 +137,7 @@ if(WIN32)
 endif()
 
 add_library(${VULKAN_VIDEO_ENCODER_STATIC_LIB} STATIC ${LIBVKVIDEOENCODER_SRC})
-if(USE_SHADERC)
+if(USE_ENCODER_SHADERC)
     # Link the libraries
     target_link_libraries(${VULKAN_VIDEO_ENCODER_STATIC_LIB} PUBLIC ${SHADERC_SHARED_LIBRARY})
 endif()