diff --git a/CMakeLists.txt b/CMakeLists.txt
index 50a16a32..b2ef28f5 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -54,7 +54,10 @@ set (VULKAN_SDK_MIN_MINOR_VERSION 4)
 set (VULKAN_SDK_MIN_PATCH_VERSION 321)
 FIND_VULKAN_SDK(${VULKAN_SDK_MIN_MAJOR_VERSION} ${VULKAN_SDK_MIN_MINOR_VERSION} ${VULKAN_SDK_MIN_PATCH_VERSION})
 
-include(FindShaderc)
+option(USE_ENCODER_SHADERC "Enable shaderc GPU compute filters for encoder (e.g. YUV conversion). Only affects the encoder build; the decoder always uses shaderc." ON)
+if(BUILD_DECODER OR USE_ENCODER_SHADERC)
+    include(FindShaderc)
+endif()
 
 ############ VULKAN_FFMPEG_LIB_PATH ######################################
 if (DEFINED ENV{VULKAN_FFMPEG_LIB_DIR_PATH})
diff --git a/cmake/LinuxSettings.cmake b/cmake/LinuxSettings.cmake
index a90e96ee..f9f3c727 100644
--- a/cmake/LinuxSettings.cmake
+++ b/cmake/LinuxSettings.cmake
@@ -84,7 +84,7 @@ endif()
 
 # Compiler flags for GCC/Clang
 if(CMAKE_COMPILER_IS_GNUCC OR CMAKE_C_COMPILER_ID MATCHES "Clang")
-    set(COMMON_COMPILE_FLAGS "-Wall -Wextra -Wundef -Wno-unused-parameter -Wno-missing-field-initializers -Wshadow")
+    set(COMMON_COMPILE_FLAGS "-Wall -Wextra -Wundef -Wno-unused-parameter -Wno-missing-field-initializers -Wshadow -Wcast-qual")
     set(COMMON_COMPILE_FLAGS "${COMMON_COMPILE_FLAGS} -fno-strict-aliasing -fno-builtin-memcmp")
 
     # Warning about implicit fallthrough in switch blocks
diff --git a/common/include/VkVideoCore/VulkanVideoCapabilities.h b/common/include/VkVideoCore/VulkanVideoCapabilities.h
index 8e0caf4f..3c8f572f 100644
--- a/common/include/VkVideoCore/VulkanVideoCapabilities.h
+++ b/common/include/VkVideoCore/VulkanVideoCapabilities.h
@@ -360,7 +360,7 @@ class VulkanVideoCapabilities
             }
         }
 
-        formatCount = std::min(supportedFormatCount, formatCount);
+        formatCount = std::min<uint32_t>(supportedFormatCount, formatCount);
 
         for (uint32_t i = 0; i < formatCount; i++) {
             formats[i] = pSupportedFormats[i].format;
diff --git a/common/include/mio/mio.hpp b/common/include/mio/mio.hpp
index 5cd55ea8..3c3e1adb 100644
--- a/common/include/mio/mio.hpp
+++ b/common/include/mio/mio.hpp
@@ -786,13 +786,13 @@ namespace win {
 /** Returns the 4 upper bytes of an 8-byte integer. */
 inline DWORD int64_high(int64_t n) noexcept
 {
-    return n >> 32;
+    return (DWORD)(n >> 32);
 }
 
 /** Returns the 4 lower bytes of an 8-byte integer. */
 inline DWORD int64_low(int64_t n) noexcept
 {
-    return n & 0xffffffff;
+    return (DWORD)(n & 0xffffffff);
 }
 
 inline std::wstring s_2_ws(const std::string& s)
@@ -887,7 +887,7 @@ inline size_t query_file_size(file_handle_type handle, std::error_code& error)
         error = detail::last_error();
         return 0;
     }
-	return static_cast<int64_t>(file_size.QuadPart);
+	return static_cast<size_t>(file_size.QuadPart);
 #else // POSIX
     struct stat sbuf;
     if(::fstat(handle, &sbuf) == -1)
@@ -933,7 +933,7 @@ inline mmap_context memory_map(const file_handle_type file_handle, const int64_t
             mode == access_mode::read ? FILE_MAP_READ : FILE_MAP_WRITE,
             win::int64_high(aligned_offset),
             win::int64_low(aligned_offset),
-            length_to_map));
+            (size_t)length_to_map));
     if(mapping_start == nullptr)
     {
         // Close file handle if mapping it failed.
diff --git a/common/include/nvidia_utils/vulkan/ycbcr_utils.h b/common/include/nvidia_utils/vulkan/ycbcr_utils.h
index 7713c1e7..46f3ed78 100644
--- a/common/include/nvidia_utils/vulkan/ycbcr_utils.h
+++ b/common/include/nvidia_utils/vulkan/ycbcr_utils.h
@@ -103,6 +103,24 @@ typedef struct YcbcrPlanesLayoutInfo {
     uint8_t              reserved;                    // reserved for structure alignment.
 } YcbcrPlanesLayoutInfo;
 
+static inline uint32_t GetBitsPerChannel(const YcbcrPlanesLayoutInfo& pYcbcrPlanesLayoutInfo)
+{
+    switch (pYcbcrPlanesLayoutInfo.bpp) {
+        case YCBCRA_8BPP:
+            return 8;
+        case YCBCRA_10BPP:
+            return 10;
+        case YCBCRA_12BPP:
+            return 12;
+        case YCBCRA_14BPP:
+            return 14;
+        case YCBCRA_16BPP:
+            return 16;
+        default:
+            return 8;
+    }
+}
+
 static inline size_t YcbcrAlign(size_t toAlign, size_t alignment)
 {
     return ((toAlign + (alignment - 1)) & ~(alignment -1));
diff --git a/common/libs/VkCodecUtils/FrameProcessor.h b/common/libs/VkCodecUtils/FrameProcessor.h
index 8a94f6ab..097a3fa6 100644
--- a/common/libs/VkCodecUtils/FrameProcessor.h
+++ b/common/libs/VkCodecUtils/FrameProcessor.h
@@ -106,7 +106,7 @@ class FrameProcessor : public VkVideoRefCountBase {
     FrameProcessor(bool verbose = false)
         : m_frameCount(0)
         , m_profileFramesCount(0)
-        , m_displayTimePeriodMilliseconds(1000)
+        , m_displayTimePeriodMilliseconds(100)
         , start_time (std::chrono::steady_clock::now())
         , m_verbose(verbose)
     {
diff --git a/common/libs/VkCodecUtils/Helpers.h b/common/libs/VkCodecUtils/Helpers.h
index 333548e0..b74e71a3 100644
--- a/common/libs/VkCodecUtils/Helpers.h
+++ b/common/libs/VkCodecUtils/Helpers.h
@@ -320,7 +320,7 @@ inline VkResult WaitAndGetStatus(const VkInterfaceFunctions* vkIf, VkDevice devi
  }
 
 template<typename NodeType, typename ChainedNodeType>
-inline VkBaseInStructure* ChainNextVkStruct(NodeType& node, ChainedNodeType& nextChainedNode) {
+inline void ChainNextVkStruct(NodeType& node, ChainedNodeType& nextChainedNode) {
     // make sure the node is of type VkBaseInStructure
     static_assert(offsetof(NodeType, sType) == offsetof(VkBaseInStructure, sType),
                   "NodeType does not have sType at the same offset as VkBaseInStructure");
@@ -341,16 +341,16 @@ inline VkBaseInStructure* ChainNextVkStruct(NodeType& node, ChainedNodeType& nex
                   "ChainedNodeType must be a standard-layout type");
 
     assert(node.sType > 0);
-    VkBaseInStructure* pNode = (VkBaseInStructure*)&node;
-    while (pNode->pNext != nullptr) {
-         pNode = (VkBaseInStructure*)pNode->pNext;
-     }
-     pNode->pNext = (VkBaseInStructure*)&nextChainedNode;
-     // make sure the nextChainedNode is of type VkBaseInStructure
-     assert(nextChainedNode.sType > 0);
-     assert(nextChainedNode.pNext == nullptr);
-     return (VkBaseInStructure*)nextChainedNode.pNext;
- }
+    VkBaseInStructure* pNode = (VkBaseInStructure*)(&node);
+    VkBaseInStructure* pNextNode = (VkBaseInStructure*)(&nextChainedNode);
+
+    // The incoming object may not have anything chained.
+    assert(pNextNode->pNext == nullptr);
+
+    // Inserts the incoming object at the beginning of the list.
+    pNextNode->pNext = pNode->pNext;
+    pNode->pNext = pNextNode;
+}
 
 class DeviceUuidUtils
 {
diff --git a/common/libs/VkCodecUtils/VkThreadPool.h b/common/libs/VkCodecUtils/VkThreadPool.h
index 44d31bd1..b9d5a508 100644
--- a/common/libs/VkCodecUtils/VkThreadPool.h
+++ b/common/libs/VkCodecUtils/VkThreadPool.h
@@ -65,8 +65,11 @@ class VkThreadPool
         std::future<return_type> res = task->get_future();
         {
             std::unique_lock<std::mutex> lock(queue_mutex);
-            if(stop)
+            if(stop) {
+#ifdef __cpp_exceptions
                 throw std::runtime_error("enqueue on stopped ThreadPool");
+#endif
+            }
 
             tasks.emplace([task](){ (*task)(); });
         }
diff --git a/common/libs/VkCodecUtils/VkVideoFrameToFile.cpp b/common/libs/VkCodecUtils/VkVideoFrameToFile.cpp
index cb71ccb6..6285d2aa 100644
--- a/common/libs/VkCodecUtils/VkVideoFrameToFile.cpp
+++ b/common/libs/VkCodecUtils/VkVideoFrameToFile.cpp
@@ -23,80 +23,7 @@
 #include "VulkanDecodedFrame.h"
 #include "Helpers.h"
 #include "VkVideoFrameOutput.h"
-
-// CRC32 lookup table
-static unsigned long Crc32Table[256] = {
-    0x00000000,0x77073096,0xee0e612c,0x990951ba,
-    0x076dc419,0x706af48f,0xe963a535,0x9e6495a3,
-    0x0edb8832,0x79dcb8a4,0xe0d5e91e,0x97d2d988,
-    0x09b64c2b,0x7eb17cbd,0xe7b82d07,0x90bf1d91,
-    0x1db71064,0x6ab020f2,0xf3b97148,0x84be41de,
-    0x1adad47d,0x6ddde4eb,0xf4d4b551,0x83d385c7,
-    0x136c9856,0x646ba8c0,0xfd62f97a,0x8a65c9ec,
-    0x14015c4f,0x63066cd9,0xfa0f3d63,0x8d080df5,
-    0x3b6e20c8,0x4c69105e,0xd56041e4,0xa2677172,
-    0x3c03e4d1,0x4b04d447,0xd20d85fd,0xa50ab56b,
-    0x35b5a8fa,0x42b2986c,0xdbbbc9d6,0xacbcf940,
-    0x32d86ce3,0x45df5c75,0xdcd60dcf,0xabd13d59,
-    0x26d930ac,0x51de003a,0xc8d75180,0xbfd06116,
-    0x21b4f4b5,0x56b3c423,0xcfba9599,0xb8bda50f,
-    0x2802b89e,0x5f058808,0xc60cd9b2,0xb10be924,
-    0x2f6f7c87,0x58684c11,0xc1611dab,0xb6662d3d,
-    0x76dc4190,0x01db7106,0x98d220bc,0xefd5102a,
-    0x71b18589,0x06b6b51f,0x9fbfe4a5,0xe8b8d433,
-    0x7807c9a2,0x0f00f934,0x9609a88e,0xe10e9818,
-    0x7f6a0dbb,0x086d3d2d,0x91646c97,0xe6635c01,
-    0x6b6b51f4,0x1c6c6162,0x856530d8,0xf262004e,
-    0x6c0695ed,0x1b01a57b,0x8208f4c1,0xf50fc457,
-    0x65b0d9c6,0x12b7e950,0x8bbeb8ea,0xfcb9887c,
-    0x62dd1ddf,0x15da2d49,0x8cd37cf3,0xfbd44c65,
-    0x4db26158,0x3ab551ce,0xa3bc0074,0xd4bb30e2,
-    0x4adfa541,0x3dd895d7,0xa4d1c46d,0xd3d6f4fb,
-    0x4369e96a,0x346ed9fc,0xad678846,0xda60b8d0,
-    0x44042d73,0x33031de5,0xaa0a4c5f,0xdd0d7cc9,
-    0x5005713c,0x270241aa,0xbe0b1010,0xc90c2086,
-    0x5768b525,0x206f85b3,0xb966d409,0xce61e49f,
-    0x5edef90e,0x29d9c998,0xb0d09822,0xc7d7a8b4,
-    0x59b33d17,0x2eb40d81,0xb7bd5c3b,0xc0ba6cad,
-    0xedb88320,0x9abfb3b6,0x03b6e20c,0x74b1d29a,
-    0xead54739,0x9dd277af,0x04db2615,0x73dc1683,
-    0xe3630b12,0x94643b84,0x0d6d6a3e,0x7a6a5aa8,
-    0xe40ecf0b,0x9309ff9d,0x0a00ae27,0x7d079eb1,
-    0xf00f9344,0x8708a3d2,0x1e01f268,0x6906c2fe,
-    0xf762575d,0x806567cb,0x196c3671,0x6e6b06e7,
-    0xfed41b76,0x89d32be0,0x10da7a5a,0x67dd4acc,
-    0xf9b9df6f,0x8ebeeff9,0x17b7be43,0x60b08ed5,
-    0xd6d6a3e8,0xa1d1937e,0x38d8c2c4,0x4fdff252,
-    0xd1bb67f1,0xa6bc5767,0x3fb506dd,0x48b2364b,
-    0xd80d2bda,0xaf0a1b4c,0x36034af6,0x41047a60,
-    0xdf60efc3,0xa867df55,0x316e8eef,0x4669be79,
-    0xcb61b38c,0xbc66831a,0x256fd2a0,0x5268e236,
-    0xcc0c7795,0xbb0b4703,0x220216b9,0x5505262f,
-    0xc5ba3bbe,0xb2bd0b28,0x2bb45a92,0x5cb36a04,
-    0xc2d7ffa7,0xb5d0cf31,0x2cd99e8b,0x5bdeae1d,
-    0x9b64c2b0,0xec63f226,0x756aa39c,0x026d930a,
-    0x9c0906a9,0xeb0e363f,0x72076785,0x05005713,
-    0x95bf4a82,0xe2b87a14,0x7bb12bae,0x0cb61b38,
-    0x92d28e9b,0xe5d5be0d,0x7cdcefb7,0x0bdbdf21,
-    0x86d3d2d4,0xf1d4e242,0x68ddb3f8,0x1fda836e,
-    0x81be16cd,0xf6b9265b,0x6fb077e1,0x18b74777,
-    0x88085ae6,0xff0f6a70,0x66063bca,0x11010b5c,
-    0x8f659eff,0xf862ae69,0x616bffd3,0x166ccf45,
-    0xa00ae278,0xd70dd2ee,0x4e048354,0x3903b3c2,
-    0xa7672661,0xd06016f7,0x4969474d,0x3e6e77db,
-    0xaed16a4a,0xd9d65adc,0x40df0b66,0x37d83bf0,
-    0xa9bcae53,0xdebb9ec5,0x47b2cf7f,0x30b5ffe9,
-    0xbdbdf21c,0xcabac28a,0x53b39330,0x24b4a3a6,
-    0xbad03605,0xcdd70693,0x54de5729,0x23d967bf,
-    0xb3667a2e,0xc4614ab8,0x5d681b02,0x2a6f2b94,
-    0xb40bbe37,0xc30c8ea1,0x5a05df1b,0x2d02ef8d
-};
-
-static void getCRC(uint32_t *checksum, const uint8_t *inputBytes, size_t length, unsigned long crcTable[]) {
-    for (size_t i = 0; i < length; i += 1) {
-        *checksum = crcTable[inputBytes[i] ^ (*checksum & 0xff)] ^ (*checksum >> 8);
-    }
-}
+#include "crcgenerator.h"
 
 // Rotate right for 16-bit unsigned integers.
 // Used to normalize MSB-aligned high bit-depth samples (10-bit, 12-bit) to LSB-aligned.
@@ -240,7 +167,7 @@ class VkVideoFrameToFileImpl : public VkVideoFrameOutput {
         }
 
         if (m_outputcrcPerFrame && m_crcOutputFile) {
-            fprintf(m_crcOutputFile, "CRC Frame[%" PRId64 "]:", pFrame->displayOrder);
+            fprintf(m_crcOutputFile, "CRC Frame[%lld]:", (long long)pFrame->displayOrder);
             for (size_t i = 0; i < m_crcInitValue.size(); i += 1) {
                 uint32_t frameCrc = m_crcInitValue[i];
                 getCRC(&frameCrc, pOutputBuffer, usedBufferSize, Crc32Table);
@@ -265,15 +192,44 @@ class VkVideoFrameToFileImpl : public VkVideoFrameOutput {
         }
     }
 
-    FILE* AttachFile(const char* fileName) {
+    bool hasExtension(const char* fileName, const char* extension) {
+        size_t fileLen = std::strlen(fileName);
+        size_t extLen = std::strlen(extension);
+
+        if (fileLen < extLen) {
+            return false;
+        }
+
+        return std::strcmp(fileName + fileLen - extLen, extension) == 0;
+    }
+
+    FILE* AttachFile(const char* fileName, bool y4mFormat) {
         if (m_outputFile) {
             fclose(m_outputFile);
             m_outputFile = nullptr;
         }
 
+        std::string fileNameWithModExt;
+        // Check if the file does not have a y4m extension,
+        // but y4m format is requested.
+        if (y4mFormat && !hasExtension(fileName, ".y4m")) {
+            std::cout << std::endl << "y4m output format is requested, ";
+            std::cout << "but the output file's (" << fileName << ") extension isn't .y4m!"
+                      << std::endl;
+            fileNameWithModExt = fileName + std::string(".y4m");
+            fileName = fileNameWithModExt.c_str();
+        } else if ((y4mFormat == false) && !hasExtension(fileName, ".yuv")) {
+            std::cout << std::endl << "Raw yuv output format is requested, ";
+            std::cout << "but the output file's (" << fileName << ") extension isn't .yuv!"
+                      << std::endl;
+            fileNameWithModExt = fileName + std::string(".yuv");
+            fileName = fileNameWithModExt.c_str();
+        }
+
         if (fileName != nullptr) {
             m_outputFile = fopen(fileName, "wb");
             if (m_outputFile) {
+                std::cout << "Output file name is: " << fileName << std::endl;
                 return m_outputFile;
             }
         }
@@ -386,6 +342,7 @@ class VkVideoFrameToFileImpl : public VkVideoFrameOutput {
         VkDeviceSize maxSize = 0;
         const uint8_t* readImagePtr = srcImageDeviceMemory->GetReadOnlyDataPtr(imageOffset, maxSize);
         assert(readImagePtr != nullptr);
+        assert(maxSize <= SIZE_MAX);  // Ensure we don't lose data in conversion
 
         int32_t secondaryPlaneWidth = frameWidth;
         int32_t secondaryPlaneHeight = frameHeight;
@@ -461,14 +418,18 @@ class VkVideoFrameToFileImpl : public VkVideoFrameOutput {
         // Copy the luma plane
         const uint32_t numCompatiblePlanes = 1;
         for (uint32_t plane = 0; plane < numCompatiblePlanes; plane++) {
-            const uint8_t* pSrc = readImagePtr + layouts[plane].offset;
-            uint8_t* pDst = pOutBuffer + yuvPlaneLayouts[plane].offset;
+            const uint8_t* pSrc = readImagePtr + static_cast<size_t>(layouts[plane].offset);
+            uint8_t* pDst = pOutBuffer + static_cast<size_t>(yuvPlaneLayouts[plane].offset);
 
             if (is8Bit) {
-                CopyPlaneData<uint8_t>(pSrc, pDst, layouts[plane].rowPitch, yuvPlaneLayouts[plane].rowPitch,
+                assert(layouts[plane].rowPitch <= SIZE_MAX);
+                assert(yuvPlaneLayouts[plane].rowPitch <= SIZE_MAX);
+                CopyPlaneData<uint8_t>(pSrc, pDst, static_cast<size_t>(layouts[plane].rowPitch), static_cast<size_t>(yuvPlaneLayouts[plane].rowPitch),
                                       frameWidth, imageHeight);
             } else {
-                CopyPlaneData<uint16_t>(pSrc, pDst, layouts[plane].rowPitch, yuvPlaneLayouts[plane].rowPitch,
+                assert(layouts[plane].rowPitch <= SIZE_MAX);
+                assert(yuvPlaneLayouts[plane].rowPitch <= SIZE_MAX);
+                CopyPlaneData<uint16_t>(pSrc, pDst, static_cast<size_t>(layouts[plane].rowPitch), static_cast<size_t>(yuvPlaneLayouts[plane].rowPitch),
                                        frameWidth, imageHeight, 1, bitShift);
             }
         }
@@ -488,10 +449,14 @@ class VkVideoFrameToFileImpl : public VkVideoFrameOutput {
                 }
 
                 if (is8Bit) {
-                    CopyPlaneData<uint8_t>(pSrc, pDst, layouts[srcPlane].rowPitch, yuvPlaneLayouts[plane].rowPitch,
+                    assert(layouts[srcPlane].rowPitch <= SIZE_MAX);
+                    assert(yuvPlaneLayouts[plane].rowPitch <= SIZE_MAX);
+                    CopyPlaneData<uint8_t>(pSrc, pDst, static_cast<size_t>(layouts[srcPlane].rowPitch), static_cast<size_t>(yuvPlaneLayouts[plane].rowPitch),
                                            planeWidth, 1, 2);
                 } else {
-                    CopyPlaneData<uint16_t>(pSrc, pDst, layouts[srcPlane].rowPitch, yuvPlaneLayouts[plane].rowPitch,
+                    assert(layouts[srcPlane].rowPitch <= SIZE_MAX);
+                    assert(yuvPlaneLayouts[plane].rowPitch <= SIZE_MAX);
+                    CopyPlaneData<uint16_t>(pSrc, pDst, static_cast<size_t>(layouts[srcPlane].rowPitch), static_cast<size_t>(yuvPlaneLayouts[plane].rowPitch),
                                             planeWidth, 1, 2, bitShift);
                 }
                 pDst += yuvPlaneLayouts[plane].rowPitch;
@@ -499,10 +464,10 @@ class VkVideoFrameToFileImpl : public VkVideoFrameOutput {
         }
 
         // Calculate total buffer size
-        outputBufferSize = yuvPlaneLayouts[0].rowPitch * imageHeight;
+        outputBufferSize = static_cast<size_t>(yuvPlaneLayouts[0].rowPitch * imageHeight);
         if (mpInfo->planesLayout.numberOfExtraPlanes >= 1) {
-            outputBufferSize += yuvPlaneLayouts[1].rowPitch * secondaryPlaneHeight;
-            outputBufferSize += yuvPlaneLayouts[2].rowPitch * secondaryPlaneHeight;
+            outputBufferSize += static_cast<size_t>(yuvPlaneLayouts[1].rowPitch * secondaryPlaneHeight);
+            outputBufferSize += static_cast<size_t>(yuvPlaneLayouts[2].rowPitch * secondaryPlaneHeight);
         }
 
         return outputBufferSize;
@@ -516,6 +481,7 @@ class VkVideoFrameToFileImpl : public VkVideoFrameOutput {
         }
 
         VkDeviceSize imageMemorySize = imageResource->GetImageDeviceMemorySize();
+        assert(imageMemorySize <= SIZE_MAX);  // Ensure we don't lose data in conversion
 
         if ((m_pLinearMemory == nullptr) || (imageMemorySize > m_allocationSize)) {
             if (m_outputFile) {
@@ -527,7 +493,7 @@ class VkVideoFrameToFileImpl : public VkVideoFrameOutput {
                 m_pLinearMemory = nullptr;
             }
 
-            m_allocationSize = (size_t)(imageMemorySize);
+            m_allocationSize = static_cast<size_t>(imageMemorySize);
             m_pLinearMemory = new uint8_t[m_allocationSize];
             if (m_pLinearMemory == nullptr) {
                 return nullptr;
@@ -568,7 +534,7 @@ VkResult VkVideoFrameOutput::Create(const char* fileName,
         return VK_ERROR_OUT_OF_HOST_MEMORY;
     }
 
-    FILE* outFile = newFrameToFile->AttachFile(fileName);
+    FILE* outFile = newFrameToFile->AttachFile(fileName, outputy4m);
     if ((fileName != nullptr) && (outFile == nullptr)) {
         delete newFrameToFile;
         return VK_ERROR_INITIALIZATION_FAILED;
diff --git a/common/libs/VkCodecUtils/VulkanFilterYuvCompute.cpp b/common/libs/VkCodecUtils/VulkanFilterYuvCompute.cpp
index dd67b2b5..dcea5b41 100644
--- a/common/libs/VkCodecUtils/VulkanFilterYuvCompute.cpp
+++ b/common/libs/VkCodecUtils/VulkanFilterYuvCompute.cpp
@@ -17,7 +17,7 @@
 #include "VulkanFilterYuvCompute.h"
 #include "nvidia_utils/vulkan/ycbcrvkinfo.h"
 
-static bool dumpShaders = false;
+static bool dumpShaders = true;
 
 VkResult VulkanFilterYuvCompute::Create(const VulkanDeviceContext* vkDevCtx,
                                         uint32_t queueFamilyIndex,
@@ -26,6 +26,8 @@ VkResult VulkanFilterYuvCompute::Create(const VulkanDeviceContext* vkDevCtx,
                                         uint32_t maxNumFrames,
                                         VkFormat inputFormat,
                                         VkFormat outputFormat,
+                                        bool inputEnableMsbToLsbShift,
+                                        bool outputEnableLsbToMsbShift,
                                         const VkSamplerYcbcrConversionCreateInfo* pYcbcrConversionCreateInfo,
                                         const YcbcrPrimariesConstants* pYcbcrPrimariesConstants,
                                         const VkSamplerCreateInfo* pSamplerCreateInfo,
@@ -39,6 +41,8 @@ VkResult VulkanFilterYuvCompute::Create(const VulkanDeviceContext* vkDevCtx,
                                                                                          maxNumFrames,
                                                                                          inputFormat,
                                                                                          outputFormat,
+                                                                                         inputEnableMsbToLsbShift,
+                                                                                         outputEnableLsbToMsbShift,
                                                                                          pYcbcrPrimariesConstants));
 
     if (!yCbCrVulkanFilter) {
@@ -116,34 +120,58 @@ VkResult VulkanFilterYuvCompute::Init(const VkSamplerYcbcrConversionCreateInfo*
 VkResult VulkanFilterYuvCompute::InitDescriptorSetLayout(uint32_t maxNumFrames)
 {
 
+
     VkSampler ccSampler = m_samplerYcbcrConversion.GetSampler();
-    assert(ccSampler != VK_NULL_HANDLE);
-    VkDescriptorType type = (ccSampler != VK_NULL_HANDLE) ? VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER : VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
+    VkDescriptorType type = (ccSampler != VK_NULL_HANDLE) ? VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER :
+                                                            VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
     const VkSampler* pImmutableSamplers = (ccSampler != VK_NULL_HANDLE) ? &ccSampler : nullptr;
 
-    const std::vector<VkDescriptorSetLayoutBinding> setLayoutBindings{
-        //                        binding,  descriptorType,          descriptorCount, stageFlags, pImmutableSamplers;
+    std::vector<VkDescriptorSetLayoutBinding> setLayoutBindings;
+
+    // Input bindings (either images or buffers)
+    if (m_inputIsBuffer) {
+        // Binding 0: Input buffer (read-only) for single buffer case
+        setLayoutBindings.push_back(VkDescriptorSetLayoutBinding{ 0, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_COMPUTE_BIT, nullptr});
+        // Binding 1: Input buffer (read-only) Y plane
+        setLayoutBindings.push_back(VkDescriptorSetLayoutBinding{ 1, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_COMPUTE_BIT, nullptr});
+        // Binding 2: Input buffer (read-only) Cb or CbCr plane
+        setLayoutBindings.push_back(VkDescriptorSetLayoutBinding{ 2, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_COMPUTE_BIT, nullptr});
+        // Binding 3: Input buffer (read-only) Cr plane
+        setLayoutBindings.push_back(VkDescriptorSetLayoutBinding{ 3, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_COMPUTE_BIT, nullptr});
+    } else {
         // Binding 0: Input image (read-only) RGBA or RGBA YCbCr sampler sampled
-        VkDescriptorSetLayoutBinding{ 0, type,                             1, VK_SHADER_STAGE_COMPUTE_BIT, pImmutableSamplers},
+        setLayoutBindings.push_back(VkDescriptorSetLayoutBinding{ 0, type, 1, VK_SHADER_STAGE_COMPUTE_BIT, pImmutableSamplers});
         // Binding 1: Input image (read-only) Y plane of YCbCr Image
-        VkDescriptorSetLayoutBinding{ 1, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 1, VK_SHADER_STAGE_COMPUTE_BIT, nullptr},
+        setLayoutBindings.push_back(VkDescriptorSetLayoutBinding{ 1, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 1, VK_SHADER_STAGE_COMPUTE_BIT, nullptr});
         // Binding 2: Input image (read-only) Cb or CbCr plane
-        VkDescriptorSetLayoutBinding{ 2, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 1, VK_SHADER_STAGE_COMPUTE_BIT, nullptr},
+        setLayoutBindings.push_back(VkDescriptorSetLayoutBinding{ 2, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 1, VK_SHADER_STAGE_COMPUTE_BIT, nullptr});
         // Binding 3: Input image (read-only) Cr plane
-        VkDescriptorSetLayoutBinding{ 3, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 1, VK_SHADER_STAGE_COMPUTE_BIT, nullptr},
+        setLayoutBindings.push_back(VkDescriptorSetLayoutBinding{ 3, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 1, VK_SHADER_STAGE_COMPUTE_BIT, nullptr});
+    }
 
+    // Output bindings (either images or buffers)
+    if (m_outputIsBuffer) {
+        // Binding 4: Output buffer (write) for single buffer case
+        setLayoutBindings.push_back(VkDescriptorSetLayoutBinding{ 4, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_COMPUTE_BIT, nullptr});
+        // Binding 5: Output buffer (write) Y plane
+        setLayoutBindings.push_back(VkDescriptorSetLayoutBinding{ 5, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_COMPUTE_BIT, nullptr});
+        // Binding 6: Output buffer (write) CbCr plane of 2-plane or Cb of 3-plane
+        setLayoutBindings.push_back(VkDescriptorSetLayoutBinding{ 6, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_COMPUTE_BIT, nullptr});
+        // Binding 7: Output buffer (write) Cr plane of 3-plane
+        setLayoutBindings.push_back(VkDescriptorSetLayoutBinding{ 7, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_COMPUTE_BIT, nullptr});
+    } else {
         // Binding 4: Output image (write) RGBA or YCbCr single-plane image
-        VkDescriptorSetLayoutBinding{ 4, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 1, VK_SHADER_STAGE_COMPUTE_BIT, nullptr},
+        setLayoutBindings.push_back(VkDescriptorSetLayoutBinding{ 4, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 1, VK_SHADER_STAGE_COMPUTE_BIT, nullptr});
         // Binding 5: Output image (write) Y plane of YCbCr Image
-        VkDescriptorSetLayoutBinding{ 5, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 1, VK_SHADER_STAGE_COMPUTE_BIT, nullptr},
+        setLayoutBindings.push_back(VkDescriptorSetLayoutBinding{ 5, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 1, VK_SHADER_STAGE_COMPUTE_BIT, nullptr});
         // Binding 6: Output image (write) CbCr plane of 2-plane or Cb of 3-plane YCbCr Image
-        VkDescriptorSetLayoutBinding{ 6, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 1, VK_SHADER_STAGE_COMPUTE_BIT, nullptr},
+        setLayoutBindings.push_back(VkDescriptorSetLayoutBinding{ 6, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 1, VK_SHADER_STAGE_COMPUTE_BIT, nullptr});
         // Binding 7: Output image (write) Cr plane of 3-pane YCbCr Image
-        VkDescriptorSetLayoutBinding{ 7, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 1, VK_SHADER_STAGE_COMPUTE_BIT, nullptr},
+        setLayoutBindings.push_back(VkDescriptorSetLayoutBinding{ 7, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 1, VK_SHADER_STAGE_COMPUTE_BIT, nullptr});
+    }
 
-        // Binding 8: uniform buffer for input parameters.
-        VkDescriptorSetLayoutBinding{ 8, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 1, VK_SHADER_STAGE_COMPUTE_BIT, nullptr},
-    };
+    // Binding 8: uniform buffer for input parameters.
+    setLayoutBindings.push_back(VkDescriptorSetLayoutBinding{ 8, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 1, VK_SHADER_STAGE_COMPUTE_BIT, nullptr});
 
     VkPushConstantRange pushConstantRange = {};
     pushConstantRange.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; // Stage the push constant is for
@@ -175,20 +203,74 @@ static YcbcrBtStandard GetYcbcrPrimariesConstantsId(VkSamplerYcbcrModelConversio
     return YcbcrBtStandardUnknown;
 }
 
+// Generate a unified push constants declaration for shaders
+/**
+ * @brief Generates GLSL code for push constants declaration used in compute shaders
+ *
+ * This function creates a standard push constants block with fields for:
+ * - Source and destination image layers
+ * - Input and output dimensions
+ * - Buffer offsets and pitches for Y, Cb, and Cr planes
+ *
+ * @param shaderStr Output stringstream where the GLSL code will be written
+ */
+static void GenPushConstantsDecl(std::stringstream& shaderStr) {
+    shaderStr << "layout(push_constant) uniform PushConstants {\n"
+              << "    uint srcLayer;        // src image layer to use\n"
+              << "    uint dstLayer;        // dst image layer to use\n"
+              << "    uint inputWidth;      // input image or buffer width\n"
+              << "    uint inputHeight;     // input image or buffer height\n"
+              << "    uint outputWidth;     // output image or buffer width\n"
+              << "    uint outputHeight;    // output image or buffer height\n"
+              << "    uint inYOffset;       // input  buffer Y plane offset\n"
+              << "    uint inCbOffset;      // input  buffer Cb plane offset\n"
+              << "    uint inCrOffset;      // input  buffer Cr plane offset\n"
+              << "    uint inYPitch;        // input  buffer Y plane pitch\n"
+              << "    uint inCbPitch;       // input  buffer Cb plane pitch\n"
+              << "    uint inCrPitch;       // input  buffer Cr plane pitch\n"
+              << "    uint outYOffset;      // output buffer Y plane offset\n"
+              << "    uint outCbOffset;     // output buffer Cb plane offset\n"
+              << "    uint outCrOffset;     // output buffer Cr plane offset\n"
+              << "    uint outYPitch;       // output buffer Y plane pitch\n"
+              << "    uint outCbPitch;      // output buffer Cb plane pitch\n"
+              << "    uint outCrPitch;      // output buffer Cr plane pitch\n"
+              << "} pushConstants;\n";
+}
+
+// Updated header function with unified push constants
+/**
+ * @brief Generates the shader header with version declaration and push constants
+ *
+ * Creates the beginning of a GLSL compute shader with:
+ * - GLSL version declaration (#version 450)
+ * - Push constants structure
+ * - Local work group size (16x16)
+ *
+ * @param shaderStr Output stringstream where the GLSL code will be written
+ */
 static void GenHeaderAndPushConst(std::stringstream& shaderStr)
 {
-    shaderStr << "#version 450\n"
-                 "layout(push_constant) uniform PushConstants {\n"
-                 "    uint srcImageLayer;  // Source image layer index\n"
-                 "    uint dstImageLayer;  // Destination image layer index\n"
-                 "    ivec2 inputSize;     // Original input image size (width, height)\n"
-                 "    ivec2 outputSize;    // Output image size (width, height, with padding)\n"
-                 "} pushConstants;\n"
-                 "\n"
-                 "layout (local_size_x = 16, local_size_y = 16) in;\n"
-                 "\n";
+    shaderStr << "#version 450\n";
+    GenPushConstantsDecl(shaderStr);
+    shaderStr << "\n"
+              << "layout (local_size_x = 16, local_size_y = 16) in;\n"
+              << "\n";
 }
 
+/**
+ * @brief Generates GLSL code for image binding layout declarations
+ *
+ * Creates the binding declaration for an image resource in the shader.
+ *
+ * @param shaderStr Output stringstream where the GLSL code will be written
+ * @param imageName Base name for the image variable
+ * @param imageSubName Suffix name for the image variable (e.g., "Y", "CbCr")
+ * @param imageFormat Format string for the image (e.g., "rgba8")
+ * @param isInput Whether this is an input (readonly) or output (writeonly) image
+ * @param binding Binding point in the descriptor set
+ * @param set Descriptor set number
+ * @param imageArray Whether the image should be declared as image2DArray instead of image2D
+ */
 static void GenImageIoBindingLayout(std::stringstream& shaderStr,
                                     const char *imageName,
                                     const char *imageSubName,
@@ -206,22 +288,249 @@ static void GenImageIoBindingLayout(std::stringstream& shaderStr,
 
 }
 
+/**
+ * @brief Generates GLSL code for handling global invocation position and bounds checking
+ *
+ * Creates code to:
+ * - Get the current pixel position from gl_GlobalInvocationID
+ * - Check if the position is within output image bounds
+ * - Return early if out of bounds to prevent invalid memory access
+ *
+ * @param shaderStr Output stringstream where the GLSL code will be written
+ */
 static void GenHandleImagePosition(std::stringstream& shaderStr)
 {
     shaderStr <<
     "    ivec2 pos = ivec2(gl_GlobalInvocationID.xy);\n"
     "    // Check for out-of-bounds writes\n"
-    "    if ((pos.x >= pushConstants.outputSize.x) || (pos.y >= pushConstants.outputSize.y)) {\n"
+    "    if ((pos.x >= pushConstants.outputWidth) || (pos.y >= pushConstants.outputHeight)) {\n"
+    "        return;\n"
+    "    }\n"
+    "\n";
+}
+
+/**
+ * @brief Generates GLSL code for buffer binding layout declarations
+ *
+ * Creates the binding declaration for a buffer resource in the shader.
+ *
+ * @param shaderStr Output stringstream where the GLSL code will be written
+ * @param bufferName Base name for the buffer variable
+ * @param bufferSubName Suffix name for the buffer variable (e.g., "Y", "CbCr")
+ * @param bufferDataType Data type of buffer elements (e.g., "uint8_t", "uint16_t")
+ * @param bufferType Vulkan descriptor type (Storage buffer, uniform texel buffer, etc.)
+ * @param isInput Whether this is an input (readonly) or output (writeonly) buffer
+ * @param binding Binding point in the descriptor set
+ * @param set Descriptor set number
+ */
+static void GenBufferIoBindingLayout(std::stringstream& shaderStr,
+                                     const char *bufferName,
+                                     const char *bufferSubName,
+                                     const char *bufferDataType,
+                                     VkDescriptorType bufferType,
+                                     bool isInput,
+                                     uint32_t binding,
+                                     uint32_t set) {
+
+    const char* readonlyModifier = isInput ? " readonly" : "";
+    const char* writeonlyModifier = isInput ? "" : " writeonly";
+
+    switch (bufferType) {
+        case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
+            shaderStr << "layout (set = " << set << ", binding = " << binding << ") uniform"
+                      << " samplerBuffer "
+                      << bufferName << bufferSubName
+                      << ";\n";
+            break;
+
+        case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
+            shaderStr << "layout (set = " << set << ", binding = " << binding << ") uniform"
+                      << readonlyModifier << writeonlyModifier
+                      << " imageBuffer "
+                      << bufferName << bufferSubName
+                      << ";\n";
+            break;
+
+        case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
+        case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
+            shaderStr << "layout (set = " << set << ", binding = " << binding << ") buffer"
+                      << readonlyModifier << writeonlyModifier
+                      << " " << bufferName << bufferSubName << "Buffer"
+                      << " {\n"
+                      << "    " << bufferDataType << "[] data;\n"
+                      << "} " << bufferName << bufferSubName << ";\n";
+            break;
+
+        default:
+            // Unsupported buffer type
+            break;
+    }
+}
+
+/**
+ * @brief Generates GLSL code for determining if a position has chroma information
+ *
+ * Creates a condition that checks if the current pixel position contains
+ * chroma information based on the subsampling ratios. For example, in 4:2:0
+ * subsampling, only pixels at even x and y coordinates have chroma samples.
+ *
+ * @param shaderStr Output stringstream where the GLSL code will be written
+ * @param chromaHorzRatio Horizontal subsampling ratio (1 for 4:4:4, 2 for 4:2:2/4:2:0)
+ * @param chromaVertRatio Vertical subsampling ratio (1 for 4:4:4/4:2:2, 2 for 4:2:0)
+ * @param useCondition Whether to output as a full if-condition (true) or just the condition expression (false)
+ * @param pixelPosName Name of the pixel position variable in the shader (default: "srcPos")
+ * @param setProcessChromaBool Name of the boolean variable to set (default: "processChromaBool")
+ */
+static void GenHandleChromaPosition(std::stringstream& shaderStr,
+                                uint32_t chromaHorzRatio,
+                                uint32_t chromaVertRatio,
+                                bool useCondition = true,
+                                const char* pixelPosName = "srcPos",
+                                const char* setProcessChromaBool = "processChromaBool")
+{
+    // Skip this for 4:4:4 since all pixels have chroma
+    if (chromaHorzRatio <= 1 && chromaVertRatio <= 1) {
+        if (useCondition) {
+            // For 4:4:4, no subsampling check needed - process all pixels
+            shaderStr << "    bool " << setProcessChromaBool << " = true;\n";
+        } else {
+            shaderStr << "true";
+        }
+        return;
+    }
+
+    // Build condition for chroma sampling
+    std::stringstream condition;
+    if (chromaHorzRatio > 1)
+        condition << "(" << pixelPosName << ".x % " << chromaHorzRatio << " == 0)";
+
+    if (chromaHorzRatio > 1 && chromaVertRatio > 1)
+        condition << " && ";
+
+    if (chromaVertRatio > 1)
+        condition << "(" << pixelPosName << ".y % " << chromaVertRatio << " == 0)";
+
+    if (useCondition) {
+        shaderStr << "    bool " << setProcessChromaBool << " = " << condition.str() << ";\n";
+    } else {
+        shaderStr << condition.str();
+    }
+}
+
+/**
+ * @brief Generates GLSL code for calculating subsampled chroma positions
+ *
+ * Creates code to compute the chroma position from a pixel position
+ * based on the subsampling ratios. For example, in 4:2:0 subsampling,
+ * the chroma position is calculated by dividing both x and y by 2.
+ *
+ * @param shaderStr Output stringstream where the GLSL code will be written
+ * @param chromaHorzRatio Horizontal subsampling ratio (1 for 4:4:4, 2 for 4:2:2/4:2:0)
+ * @param chromaVertRatio Vertical subsampling ratio (1 for 4:4:4/4:2:2, 2 for 4:2:0)
+ * @param srcPosName Name of the source position variable (default: "srcPos")
+ * @param dstPosName Name of the destination position variable (default: "chromaSrcPos")
+ * @param indent Number of spaces to indent the output code (default: 8)
+ * @param generateIfBlock Whether to generate an if-block or just assignment statements (default: false)
+ */
+static void GenCalculateChromaPosition(std::stringstream& shaderStr,
+                                     uint32_t chromaHorzRatio,
+                                     uint32_t chromaVertRatio,
+                                     const char* srcPosName = "srcPos",
+                                     const char* dstPosName = "chromaSrcPos",
+                                     int indent = 8,
+                                     bool generateIfBlock = false)
+{
+    std::string indentStr(indent, ' ');
+
+    // For 4:4:4, no subsampling needed
+    if (chromaHorzRatio <= 1 && chromaVertRatio <= 1) {
+        shaderStr << indentStr << "// No subsampling for 4:4:4 format, use original position\n";
+        if (generateIfBlock) {
+            shaderStr << indentStr << "// " << dstPosName << " already equals " << srcPosName << "\n";
+        } else {
+            shaderStr << indentStr << dstPosName << " = " << srcPosName << ";\n";
+        }
+        return;
+    }
+
+    shaderStr << indentStr << "// Calculate subsampled positions based on format's subsampling\n";
+
+    if (generateIfBlock) {
+        // Generate an if-block for conditional calculation
+        shaderStr << indentStr << dstPosName << " = " << srcPosName << ";\n";
+        shaderStr << indentStr << "if (processChroma) {\n";
+
+        if (chromaHorzRatio > 1) {
+            shaderStr << indentStr << "    " << dstPosName << ".x = " << srcPosName << ".x / " << chromaHorzRatio << ";\n";
+        }
+
+        if (chromaVertRatio > 1) {
+            shaderStr << indentStr << "    " << dstPosName << ".y = " << srcPosName << ".y / " << chromaVertRatio << ";\n";
+        }
+
+        shaderStr << indentStr << "}\n";
+    } else {
+        // Generate direct assignment statements
+        shaderStr << indentStr << dstPosName << " = ivec2(";
+
+        if (chromaHorzRatio > 1)
+            shaderStr << srcPosName << ".x / " << chromaHorzRatio;
+        else
+            shaderStr << srcPosName << ".x";
+
+        shaderStr << ", ";
+
+        if (chromaVertRatio > 1)
+            shaderStr << srcPosName << ".y / " << chromaVertRatio;
+        else
+            shaderStr << srcPosName << ".y";
+
+        shaderStr << ");\n";
+    }
+}
+
+/**
+ * @brief Generates GLSL code for handling buffer position calculations with chroma subsampling
+ *
+ * Creates code to:
+ * - Get the current pixel position from gl_GlobalInvocationID
+ * - Check if the position is within output bounds
+ * - Calculate appropriate buffer indices based on subsampling ratios
+ *
+ * @param shaderStr Output stringstream where the GLSL code will be written
+ * @param chromaHorzRatio Horizontal subsampling ratio (default: 2 for 4:2:0/4:2:2)
+ * @param chromaVertRatio Vertical subsampling ratio (default: 2 for 4:2:0)
+ */
+static void GenHandleBufferPosition(std::stringstream& shaderStr, int chromaHorzRatio = 2, int chromaVertRatio = 2)
+{
+    shaderStr <<
+    "    ivec2 pos = ivec2(gl_GlobalInvocationID.xy);\n"
+    "    // Check for out-of-bounds writes\n"
+    "    if ((pos.x >= pushConstants.outputWidth) || (pos.y >= pushConstants.outputHeight)) {\n"
     "        return;\n"
     "    }\n"
+    "    \n"
+    "    // Calculate buffer indices based on position and strides\n"
+    "    uint yIndex = pushConstants.inYOffset + pos.y * pushConstants.inYPitch + pos.x;\n"
+    "    uint cbIndex = pushConstants.inCbOffset + (pos.y / " << chromaVertRatio << ") * pushConstants.inCbPitch + (pos.x / " << chromaHorzRatio << ");\n"
+    "    uint crIndex = pushConstants.inCrOffset + (pos.y / " << chromaVertRatio << ") * pushConstants.inCrPitch + (pos.x / " << chromaHorzRatio << ");\n"
     "\n";
 }
 
+/**
+ * @brief Generates GLSL code for handling source position with optional replication
+ *
+ * Creates code to calculate source position, with optional boundary handling
+ * by replicating edge pixels when coordinates exceed input dimensions.
+ *
+ * @param shaderStr Output stringstream where the GLSL code will be written
+ * @param enableReplicate Whether to enable edge replication (clamp to edge)
+ */
 static void GenHandleSourcePositionWithReplicate(std::stringstream& shaderStr, bool enableReplicate)
 {
     if (enableReplicate) {
         shaderStr <<
-        "    ivec2 srcPos = min(pos, pushConstants.inputSize );\n"
+        "    ivec2 srcPos = min(pos, ivec2(pushConstants.inputWidth, pushConstants.inputHeight));\n"
         "\n";
     } else {
         shaderStr <<
@@ -230,15 +539,622 @@ static void GenHandleSourcePositionWithReplicate(std::stringstream& shaderStr, b
     }
 }
 
-void VulkanFilterYuvCompute::ShaderGeneratePlaneDescriptors(std::stringstream& shaderStr,
-                                                            VkImageAspectFlags& imageAspects,
-                                                            const char *imageName,
-                                                            VkFormat    imageFormat,
-                                                            bool isInput,
-                                                            uint32_t startBinding,
-                                                            uint32_t set,
-                                                            bool imageArray)
+/**
+ * @brief Generates GLSL function for fetching Y samples from a buffer
+ *
+ * Creates a helper function that reads Y samples from a buffer and
+ * normalizes values to 0.0-1.0 range, handling different bit depths.
+ *
+ * @param shaderStr Output stringstream where the GLSL code will be written
+ * @param isHighBitDepth Whether the Y data is high bit depth (>8 bits)
+ * @param bitDepth The bit depth of Y samples (8, 10, 12, or 16)
+ */
+static void GenFetchYFromBufferFunc(std::stringstream& shaderStr,
+                                    bool isHighBitDepth, uint32_t bitDepth)
 {
+    shaderStr << "// Function to fetch Y component from buffer\n"
+              << "float fetchYFromBuffer(uint index) {\n";
+
+    if (isHighBitDepth) {
+        shaderStr << "    uint16_t rawValue = inputBufferY.data[index];\n"
+                  << "    return extractHighBitDepth(rawValue);\n";
+    } else {
+        shaderStr << "    uint8_t byteValue = inputBufferY.data[index];\n"
+                  << "    return float(byteValue) / 255.0;\n";
+    }
+
+    shaderStr << "}\n\n";
+}
+
+/**
+ * @brief Generates GLSL functions for fetching Cb and Cr samples from buffers
+ *
+ * Creates helper functions to read Cb and Cr chroma samples from buffers and
+ * normalize values to 0.0-1.0 range, handling different bit depths.
+ *
+ * @param shaderStr Output stringstream where the GLSL code will be written
+ * @param isHighBitDepth Whether the chroma data is high bit depth (>8 bits)
+ * @param bitDepth The bit depth of chroma samples (8, 10, 12, or 16)
+ */
+static void GenFetchCbCrFromBufferFunc(std::stringstream& shaderStr,
+                                       bool isHighBitDepth, uint32_t bitDepth) {
+    // Cb fetch function
+    shaderStr << "// Function to fetch Cb component from buffer\n"
+              << "float fetchCbFromBuffer(uint index) {\n";
+
+    if (isHighBitDepth) {
+        shaderStr << "    uint16_t rawValue = inputBufferCb.data[index];\n"
+                  << "    return extractHighBitDepth(rawValue);\n";
+    } else {
+        shaderStr << "    uint8_t byteValue = inputBufferCb.data[index];\n"
+                  << "    return float(byteValue) / 255.0;\n";
+    }
+
+    shaderStr << "}\n\n";
+
+    // Cr fetch function
+    shaderStr << "// Function to fetch Cr component from buffer\n"
+              << "float fetchCrFromBuffer(uint index) {\n";
+
+    if (isHighBitDepth) {
+        shaderStr << "    uint16_t rawValue = inputBufferCr.data[index];\n"
+                  << "    return extractHighBitDepth(rawValue);\n";
+    } else {
+        shaderStr << "    uint8_t byteValue = inputBufferCr.data[index];\n"
+                  << "    return float(byteValue) / 255.0;\n";
+    }
+
+    shaderStr << "}\n\n";
+}
+
+/**
+ * @brief Generates GLSL function for extracting and normalizing high bit-depth values
+ *
+ * Creates a helper function to extract and normalize values from high bit-depth
+ * formats (10, 12, or 16 bits), handling MSB or LSB aligned data.
+ *
+ * @param shaderStr Output stringstream where the GLSL code will be written
+ * @param isMSB Whether the high bits are MSB-aligned (true) or LSB-aligned (false)
+ * @param bitDepth The bit depth of the samples (10, 12, or 16)
+ */
+static void GenExtractHighBitDepthFunc(std::stringstream& shaderStr,
+                                       bool isMSB, uint32_t bitDepth)
+{
+    shaderStr << "// Helper function to extract and normalize high bit-depth values\n";
+
+    if (isMSB) {
+        // For MSB-aligned data
+        shaderStr << "float extractHighBitDepth(uint value) {\n"
+                  << "    // For MSB-aligned " << bitDepth << "-bit data, shift right to extract the bits\n"
+                  << "    uint extractedValue = value >> (16u - " << bitDepth << "u);\n"
+                  << "    // Normalize to 0.0-1.0 range\n"
+                  << "    return float(extractedValue) / " << ((1 << bitDepth) - 1) << ".0;\n"
+                  << "}\n\n";
+    } else {
+        // For LSB-aligned data
+        shaderStr << "float extractHighBitDepth(uint value) {\n"
+                  << "    // For LSB-aligned " << bitDepth << "-bit data, mask to extract the bits\n"
+                  << "    uint extractedValue = value & " << ((1 << bitDepth) - 1) << "u;\n"
+                  << "    // Normalize to 0.0-1.0 range\n"
+                  << "    return float(extractedValue) / " << ((1 << bitDepth) - 1) << ".0;\n"
+                  << "}\n\n";
+    }
+}
+
+/**
+ * @brief Generates GLSL code for applying MSB-to-LSB bit shifting for high bit-depth content
+ *
+ * Creates code to convert MSB-aligned high bit-depth content to normalized values:
+ * - For images (floating point): Divide by the appropriate factor
+ * - For buffers (integer): Perform right bit shift operations
+ *
+ * @param shaderStr Output stringstream where the GLSL code will be written
+ * @param isInputBuffer Whether the input is a buffer (true) or image (false)
+ * @param inputBitDepth The bit depth of the input data (8, 10, 12, or 16)
+ * @param imageAspects Image aspect flags indicating which planes are being processed
+ */
+static void GenApplyMsbToLsbShift(std::stringstream& shaderStr,
+                                 bool isInputBuffer,
+                                 uint32_t inputBitDepth,
+                                 VkImageAspectFlags imageAspects)
+{
+    // Only apply for high bit-depth formats (10/12-bit)
+    if ((inputBitDepth != 10) && (inputBitDepth != 12)) {
+        return;
+    }
+
+    // Calculate shift amount based on bit depth
+    uint32_t shiftAmount = 16 - inputBitDepth;
+    float shiftFactor = static_cast<float>(1 << shiftAmount);
+
+    shaderStr << "\n    // MSB-to-LSB shift for high bit-depth "
+              << (isInputBuffer ? "buffer" : "image") << " data\n";
+
+    if (isInputBuffer) {
+        // For buffers, we use actual bit shifting operations on integer values
+        shaderStr << "    // For high bit-depth data in buffers, we need to shift right by "
+                  << shiftAmount << " bits to convert from MSB-aligned to actual values\n"
+                  << "    // This is a right shift operation for integer values\n";
+
+        // Build a condition mask based on which components are being read
+        std::string maskCondition = "";
+        bool needsOr = false;
+
+        if (imageAspects & VK_IMAGE_ASPECT_PLANE_0_BIT) {
+            maskCondition += "YCbCrRawOut.x > 0.0";
+            needsOr = true;
+        }
+
+        if (imageAspects & VK_IMAGE_ASPECT_PLANE_1_BIT) {
+            if (needsOr) maskCondition += " || ";
+            maskCondition += "YCbCrRawOut.y > 0.0";
+            needsOr = true;
+        }
+
+        if (imageAspects & VK_IMAGE_ASPECT_PLANE_2_BIT) {
+            if (needsOr) maskCondition += " || ";
+            maskCondition += "YCbCrRawOut.z > 0.0";
+        }
+
+        // Only apply shift if there are values to shift
+        if (!maskCondition.empty()) {
+            shaderStr << "    if (" << maskCondition << ") {\n"
+                      << "        // Convert from uint values to normalized float (for buffer inputs)\n";
+
+            if (inputBitDepth == 10) {
+                shaderStr << "        // For 10-bit: Convert 10-bit values [0-1023] to normalized [0-1]\n"
+                          << "        const float normFactor = 1.0 / 1023.0;\n";
+            } else { // 12-bit
+                shaderStr << "        // For 12-bit: Convert 12-bit values [0-4095] to normalized [0-1]\n"
+                          << "        const float normFactor = 1.0 / 4095.0;\n";
+            }
+
+            // Apply right shift with bit mask to extract the actual bit values
+            // For 10-bit: (value >> 6) & 0x3FF = value / 64 (rounded down)
+            // For 12-bit: (value >> 4) & 0xFFF = value / 16 (rounded down)
+            shaderStr << "        // Apply right shift to convert from MSB-aligned to actual bit values\n";
+
+            // Apply component-specific shifting based on which aspects are being read
+            if (imageAspects & VK_IMAGE_ASPECT_PLANE_0_BIT) {
+                shaderStr << "        YCbCrRawOut.x = floor(YCbCrRawOut.x / " << shiftFactor
+                          << ".0) * normFactor;\n";
+            }
+
+            if (imageAspects & VK_IMAGE_ASPECT_PLANE_1_BIT) {
+                shaderStr << "        YCbCrRawOut.y = floor(YCbCrRawOut.y / " << shiftFactor
+                          << ".0) * normFactor;\n";
+            }
+
+            if (imageAspects & VK_IMAGE_ASPECT_PLANE_2_BIT) {
+                shaderStr << "        YCbCrRawOut.z = floor(YCbCrRawOut.z / " << shiftFactor
+                          << ".0) * normFactor;\n";
+            }
+
+            shaderStr << "    }\n";
+        }
+    } else {
+        // For images, we're already working with normalized values, so we divide by shiftFactor
+        shaderStr << "    // For high bit-depth data in images that are MSB-aligned,\n"
+                  << "    // we need to divide by " << shiftFactor << " to get the proper normalized values\n";
+
+        // Build a shift mask based on which components are being read
+        std::string shiftMask = "vec3(";
+        shiftMask += (imageAspects & VK_IMAGE_ASPECT_PLANE_0_BIT) ? "1.0, " : "0.0, ";
+        shiftMask += (imageAspects & VK_IMAGE_ASPECT_PLANE_1_BIT) ? "1.0, " : "0.0, ";
+        shiftMask += (imageAspects & VK_IMAGE_ASPECT_PLANE_2_BIT) ? "1.0"   : "0.0";
+        shiftMask += ")";
+
+        // Calculate reciprocal of shift factor (for multiplication instead of division)
+        float shiftFactorRecip = 1.0f / shiftFactor;
+
+        // Only apply shift to the components that were actually read
+        shaderStr << "    // Apply multiplication by reciprocal instead of division (more efficient)\n"
+                  << "    const float shiftFactorRecip = " << std::fixed << std::setprecision(8) << shiftFactorRecip << "f;\n"
+                  << "    YCbCrRawOut = YCbCrRawOut * shiftFactorRecip * " << shiftMask << " + \n"
+                  << "                  YCbCrRawOut * (vec3(1.0) - " << shiftMask << ");\n";
+    }
+}
+
+/**
+ * @brief Generates GLSL function for reading YCbCr data from either buffer or image sources
+ *
+ * Creates a function that reads YCbCr data from the appropriate source (buffer or image)
+ * based on the input format configuration. Handles different bit depths and plane layouts.
+ *
+ * @param shaderStr Output stringstream where the GLSL code will be written
+ * @param isInputBuffer Whether the input is a buffer (true) or image (false)
+ * @param inputBitDepth The bit depth of the input data (8, 10, 12, or 16)
+ * @param isInputTwoPlane Whether the input has two planes (e.g., NV12) or three planes
+ */
+static void GenReadYCbCrBuffer(std::stringstream& shaderStr,
+                               bool isInputBuffer,
+                               uint32_t inputBitDepth,
+                               bool isInputTwoPlane,
+                               bool enableMsbToLsbShift = false,
+                               VkImageAspectFlags imageAspects = VK_IMAGE_ASPECT_PLANE_0_BIT |
+                                                                 VK_IMAGE_ASPECT_PLANE_1_BIT |
+                                                                 VK_IMAGE_ASPECT_PLANE_2_BIT,
+                               const char* useProcessChromaBool = "processChroma")
+{
+    // Generate function to read from either buffer or image
+    shaderStr <<
+        "// Function to read YCbCr data from input source (buffer or image)\n"
+        "vec3 readYCbCrFromSource(ivec2 pos, ivec2 chromaPos, uint srcLayer, bool processChroma) {\n"
+        "    // Initialize to YCbCr black values (for limited range)\n";
+
+    // Set appropriate black values based on bit depth
+    if (inputBitDepth == 8) {
+        shaderStr << "    vec3 YCbCrRawOut = vec3(16.0/255.0, 128.0/255.0, 128.0/255.0);\n\n";
+    } else if (inputBitDepth == 10) {
+        shaderStr << "    vec3 YCbCrRawOut = vec3(64.0/1023.0, 512.0/1023.0, 512.0/1023.0);\n\n";
+    } else if (inputBitDepth == 12) {
+        shaderStr << "    vec3 YCbCrRawOut = vec3(256.0/4095.0, 2048.0/4095.0, 2048.0/4095.0);\n\n";
+    } else if (inputBitDepth == 16) {
+        shaderStr << "    vec3 YCbCrRawOut = vec3(4096.0/65535.0, 32768.0/65535.0, 32768.0/65535.0);\n\n";
+    } else {
+        // Default fallback
+        shaderStr << "    vec3 YCbCrRawOut = vec3(16.0/255.0, 128.0/255.0, 128.0/255.0);\n\n";
+    }
+
+    if (isInputBuffer) {
+        // Reading from buffer
+        shaderStr << "    // Reading from buffer source\n";
+
+        // Read Y component if PLANE_0_BIT is set
+        if (imageAspects & VK_IMAGE_ASPECT_PLANE_0_BIT) {
+            shaderStr <<
+                "    // Calculate buffer index for Y plane\n"
+                "    uint yIndex = pushConstants.inYOffset + pos.y * pushConstants.inYPitch + pos.x;\n"
+                "    YCbCrRawOut.x = fetchYFromBuffer(yIndex);\n\n";
+        }
+
+        // Read Cb/Cr components based on plane format and aspect flags
+        if ((imageAspects & (VK_IMAGE_ASPECT_PLANE_1_BIT | VK_IMAGE_ASPECT_PLANE_2_BIT)) != 0) {
+            // Add conditional check for chroma processing
+            shaderStr << "    // Process chroma data conditionally\n"
+                      << "    if (processChroma) {\n";
+
+            if (isInputTwoPlane) {
+                // Two-plane input buffer format with interleaved CbCr
+                shaderStr << "        // Read interleaved CbCr data from 2-plane input buffer\n"
+                          << "        uint cbcrIndex = pushConstants.inCbOffset + chromaPos.y * pushConstants.inCbPitch + chromaPos.x * 2;\n";
+
+                if (imageAspects & VK_IMAGE_ASPECT_PLANE_1_BIT) {
+                    shaderStr << "        YCbCrRawOut.y = fetchCbFromBuffer(cbcrIndex);\n"
+                              << "        YCbCrRawOut.z = fetchCrFromBuffer(cbcrIndex + 1);\n";
+                }
+            } else {
+                // Three-plane input buffer format with separate Cb and Cr planes
+                shaderStr << "        // Read separate Cb and Cr from 3-plane input buffer\n";
+
+                if (imageAspects & VK_IMAGE_ASPECT_PLANE_1_BIT) {
+                    shaderStr << "        uint cbIndex = pushConstants.inCbOffset + chromaPos.y * pushConstants.inCbPitch + chromaPos.x;\n"
+                              << "        YCbCrRawOut.y = fetchCbFromBuffer(cbIndex);\n";
+                }
+
+                if (imageAspects & VK_IMAGE_ASPECT_PLANE_2_BIT) {
+                    shaderStr << "        uint crIndex = pushConstants.inCrOffset + chromaPos.y * pushConstants.inCrPitch + chromaPos.x;\n"
+                              << "        YCbCrRawOut.z = fetchCrFromBuffer(crIndex);\n";
+                }
+            }
+
+            // Close the conditional block
+            shaderStr << "    }\n";
+        }
+    } else {
+        // Reading from image
+        shaderStr << "    // Reading from image source\n";
+
+        // Read Y component if PLANE_0_BIT is set
+        if (imageAspects & VK_IMAGE_ASPECT_PLANE_0_BIT) {
+            shaderStr << "    // Read Y value from Y plane\n"
+                      << "    YCbCrRawOut.x = imageLoad(inputImageY, ivec3(pos, srcLayer)).r;\n\n";
+        }
+
+        // Read Cb/Cr components based on plane format and aspect flags
+        if ((imageAspects & (VK_IMAGE_ASPECT_PLANE_1_BIT | VK_IMAGE_ASPECT_PLANE_2_BIT)) != 0) {
+            // Add conditional check for chroma processing
+            shaderStr << "    // Process chroma data conditionally\n"
+                      << "    if (processChroma) {\n";
+
+            if (isInputTwoPlane) {
+                // Two-plane input image format with interleaved CbCr
+                shaderStr << "        // Read interleaved CbCr data from 2-plane input image\n";
+
+                if (imageAspects & VK_IMAGE_ASPECT_PLANE_1_BIT) {
+                    // For two-plane formats (NV12, etc.), both Cb and Cr are in the second plane
+                    shaderStr << "        YCbCrRawOut.yz = imageLoad(inputImageCbCr, ivec3(chromaPos, srcLayer)).rg;\n";
+                }
+            } else {
+                // Three-plane input image format with separate Cb and Cr planes
+                shaderStr << "        // Read separate Cb and Cr from 3-plane input image\n";
+
+                if (imageAspects & VK_IMAGE_ASPECT_PLANE_1_BIT) {
+                    shaderStr << "        YCbCrRawOut.y = imageLoad(inputImageCb, ivec3(chromaPos, srcLayer)).r; // Cb\n";
+                }
+
+                if (imageAspects & VK_IMAGE_ASPECT_PLANE_2_BIT) {
+                    shaderStr << "        YCbCrRawOut.z = imageLoad(inputImageCr, ivec3(chromaPos, srcLayer)).r; // Cr\n";
+                }
+            }
+
+            // Close the conditional block
+            shaderStr << "    }\n";
+        }
+    }
+
+    // Apply MSB-to-LSB shift if enabled
+    if (enableMsbToLsbShift) {
+        GenApplyMsbToLsbShift(shaderStr, isInputBuffer, inputBitDepth, imageAspects);
+    }
+
+    // Return the raw YCbCr values
+    shaderStr <<
+        "\n    return YCbCrRawOut;\n"
+        "}\n\n";
+}
+
+/**
+ * @brief Generates GLSL function for applying LSB-to-MSB bit shifting for high bit-depth content
+ *
+ * Creates code to convert normalized values to MSB-aligned high bit-depth content by
+ * applying the appropriate bit shift. This function only handles the shift calculation,
+ * not the actual I/O operations.
+ *
+ * @param shaderStr Output stringstream where the GLSL code will be written
+ * @param isOutputBuffer Whether the output is a buffer (true) or image (false)
+ * @param outputBitDepth The bit depth of the output data (8, 10, 12, or 16)
+ */
+static void GenApplyLsbToMsbShift(std::stringstream& shaderStr,
+                                  bool isOutputBuffer,
+                                  uint32_t outputBitDepth)
+{
+    // Only apply for high bit-depth formats (10/12-bit)
+    if ((outputBitDepth != 10) && (outputBitDepth != 12)) {
+        // For 8-bit or 16-bit, no shift is needed - just use the input values directly
+        shaderStr << "    // No bit-depth shift needed for " << outputBitDepth << "-bit format\n\n";
+        return;
+    }
+
+    // Calculate shift amount based on bit depth
+    uint32_t shiftAmount = 16 - outputBitDepth;
+    float shiftFactor = static_cast<float>(1 << shiftAmount);
+
+    shaderStr << "    // Apply LSB-to-MSB shift for high bit-depth "
+              << (isOutputBuffer ? "buffer" : "image") << " data\n";
+
+    if (isOutputBuffer) {
+        // For buffers, we'll return unshifted values because the packing functions
+        // handle the bit shifting during the actual write operation
+        shaderStr << "    // For buffer output, shift will be applied during packing\n\n";
+    } else {
+        // For images, we need to multiply by shift factor to align bits properly
+        // Calculate multiplication factor
+        shaderStr << "    // For image output with " << outputBitDepth << "-bit, multiply by " << shiftFactor
+                  << " to shift into the MSB\n"
+                  << "    const float shiftFactorMultiplier = " << shiftFactor << ";\n"
+                  << "    YCbCrRawIn = YCbCrRawIn * shiftFactorMultiplier;\n\n";
+    }
+}
+
+/**
+ * @brief Generates GLSL function for writing YCbCr data to either buffer or image destinations
+ *
+ * Creates a function that writes YCbCr data to the appropriate destination (buffer or image)
+ * based on the output format configuration. Handles different bit depths and plane layouts.
+ *
+ * @param shaderStr Output stringstream where the GLSL code will be written
+ * @param isOutputBuffer Whether the output is a buffer (true) or image (false)
+ * @param outputBitDepth The bit depth of the output data (8, 10, 12, or 16)
+ * @param isOutputTwoPlane Whether the output format has two planes (e.g., NV12) or three planes
+ */
+static void GenWriteYCbCrBuffer(std::stringstream& shaderStr,
+                                bool isOutputBuffer,
+                                uint32_t outputBitDepth,
+                                bool isOutputTwoPlane,
+                                bool enableLsbToMsbShift = false,
+                                VkImageAspectFlags imageAspects = VK_IMAGE_ASPECT_PLANE_0_BIT |
+                                                                  VK_IMAGE_ASPECT_PLANE_1_BIT |
+                                                                  VK_IMAGE_ASPECT_PLANE_2_BIT,
+                                const char* useProcessChromaBool = "processChroma")
+{
+    // Generate function to write to either buffer or image
+    shaderStr <<
+        "// Function to write YCbCr data to output destination (buffer or image)\n"
+        "void writeYCbCrToDestination(vec3 YCbCrRawIn, ivec2 pos, ivec2 chromaPos, uint dstLayer, bool processChroma) {\n";
+
+    // Apply LSB-to-MSB shift if enabled - just transforms the values, doesn't do I/O
+    if (enableLsbToMsbShift) {
+        GenApplyLsbToMsbShift(shaderStr, isOutputBuffer, outputBitDepth);
+    }
+
+    if (isOutputBuffer) {
+        // Writing to buffer
+        shaderStr <<
+            "    // Writing to buffer destination\n";
+
+        // Write Y component if PLANE_0_BIT is set
+        if (imageAspects & VK_IMAGE_ASPECT_PLANE_0_BIT) {
+            shaderStr <<
+                "    // Calculate buffer index for Y plane\n"
+                "    uint outYIndex = pushConstants.outYOffset + pos.y * pushConstants.outYPitch + pos.x;\n\n";
+
+            // Handle normal Y component based on bit depth
+            if (outputBitDepth > 8) {
+                // For high bit-depth formats
+                switch (outputBitDepth) {
+                    case 10:
+                        shaderStr << "    outputBufferY.data[outYIndex] = pack10BitTo16Bit(YCbCrRawIn.x);\n\n";
+                        break;
+                    case 12:
+                        shaderStr << "    outputBufferY.data[outYIndex] = pack12BitTo16Bit(YCbCrRawIn.x);\n\n";
+                        break;
+                    case 16:
+                    default:
+                        // For 16-bit, direct value
+                        shaderStr << "    outputBufferY.data[outYIndex] = uint16_t(clamp(YCbCrRawIn.x, 0.0, 65535.0));\n\n";
+                        break;
+                }
+            } else {
+                // For 8-bit formats
+                shaderStr << "    outputBufferY.data[outYIndex] = uint8_t(clamp(YCbCrRawIn.x, 0.0, 255.0));\n\n";
+            }
+        }
+
+        // Write Cb/Cr components based on plane format and aspect flags
+        if ((imageAspects & (VK_IMAGE_ASPECT_PLANE_1_BIT | VK_IMAGE_ASPECT_PLANE_2_BIT)) != 0) {
+            shaderStr << "    // Process chroma data conditionally\n"
+                      << "    if (processChroma) {\n";
+
+            if (isOutputTwoPlane) {
+                // Two-plane output buffer format with interleaved CbCr
+                shaderStr << "        // Write interleaved CbCr to 2-plane output buffer\n"
+                          << "        uint outCbCrIndex = pushConstants.outCbOffset + chromaPos.y * pushConstants.outCbPitch + chromaPos.x * 2;\n";
+
+                // Normal CbCr processing
+                if (outputBitDepth > 8) {
+                    // For high bit-depth formats with interleaved data
+                    switch (outputBitDepth) {
+                        case 10:
+                            if (imageAspects & VK_IMAGE_ASPECT_PLANE_1_BIT) {
+                                shaderStr << "        outputBufferCbCr.data[outCbCrIndex] = pack10BitTo16Bit(YCbCrRawIn.y);\n"
+                                          << "        outputBufferCbCr.data[outCbCrIndex + 1] = pack10BitTo16Bit(YCbCrRawIn.z);\n";
+                            }
+                            break;
+                        case 12:
+                            if (imageAspects & VK_IMAGE_ASPECT_PLANE_1_BIT) {
+                                shaderStr << "        outputBufferCbCr.data[outCbCrIndex] = pack12BitTo16Bit(YCbCrRawIn.y);\n"
+                                          << "        outputBufferCbCr.data[outCbCrIndex + 1] = pack12BitTo16Bit(YCbCrRawIn.z);\n";
+                            }
+                            break;
+                        case 16:
+                        default:
+                            // For 16-bit, direct values
+                            if (imageAspects & VK_IMAGE_ASPECT_PLANE_1_BIT) {
+                                shaderStr << "        outputBufferCbCr.data[outCbCrIndex] = uint16_t(clamp(YCbCrRawIn.y, 0.0, 65535.0));\n"
+                                          << "        outputBufferCbCr.data[outCbCrIndex + 1] = uint16_t(clamp(YCbCrRawIn.z, 0.0, 65535.0));\n";
+                            }
+                            break;
+                    }
+                } else {
+                    // For 8-bit formats
+                    if (imageAspects & VK_IMAGE_ASPECT_PLANE_1_BIT) {
+                        shaderStr << "        outputBufferCbCr.data[outCbCrIndex] = uint8_t(clamp(YCbCrRawIn.y, 0.0, 255.0));\n"
+                                  << "        outputBufferCbCr.data[outCbCrIndex + 1] = uint8_t(clamp(YCbCrRawIn.z, 0.0, 255.0));\n";
+                    }
+                }
+            } else {
+                // Three-plane output buffer format with separate Cb and Cr planes
+                shaderStr << "        // Write separate Cb and Cr to 3-plane output buffer\n";
+
+                // Calculate indices for separate planes
+                if (imageAspects & VK_IMAGE_ASPECT_PLANE_1_BIT) {
+                    shaderStr << "        uint outCbIndex = pushConstants.outCbOffset + chromaPos.y * pushConstants.outCbPitch + chromaPos.x;\n";
+                }
+
+                if (imageAspects & VK_IMAGE_ASPECT_PLANE_2_BIT) {
+                    shaderStr << "        uint outCrIndex = pushConstants.outCrOffset + chromaPos.y * pushConstants.outCrPitch + chromaPos.x;\n";
+                }
+
+                if (outputBitDepth > 8) {
+                    // For high bit-depth formats
+                    switch (outputBitDepth) {
+                        case 10:
+                            if (imageAspects & VK_IMAGE_ASPECT_PLANE_1_BIT) {
+                                shaderStr << "        outputBufferCb.data[outCbIndex] = pack10BitTo16Bit(YCbCrRawIn.y);\n";
+                            }
+                            if (imageAspects & VK_IMAGE_ASPECT_PLANE_2_BIT) {
+                                shaderStr << "        outputBufferCr.data[outCrIndex] = pack10BitTo16Bit(YCbCrRawIn.z);\n";
+                            }
+                            break;
+                        case 12:
+                            if (imageAspects & VK_IMAGE_ASPECT_PLANE_1_BIT) {
+                                shaderStr << "        outputBufferCb.data[outCbIndex] = pack12BitTo16Bit(YCbCrRawIn.y);\n";
+                            }
+                            if (imageAspects & VK_IMAGE_ASPECT_PLANE_2_BIT) {
+                                shaderStr << "        outputBufferCr.data[outCrIndex] = pack12BitTo16Bit(YCbCrRawIn.z);\n";
+                            }
+                            break;
+                        case 16:
+                        default:
+                            // For 16-bit, direct values
+                            if (imageAspects & VK_IMAGE_ASPECT_PLANE_1_BIT) {
+                                shaderStr << "        outputBufferCb.data[outCbIndex] = uint16_t(clamp(YCbCrRawIn.y, 0.0, 65535.0));\n";
+                            }
+                            if (imageAspects & VK_IMAGE_ASPECT_PLANE_2_BIT) {
+                                shaderStr << "        outputBufferCr.data[outCrIndex] = uint16_t(clamp(YCbCrRawIn.z, 0.0, 65535.0));\n";
+                            }
+                            break;
+                    }
+                } else {
+                    // For 8-bit formats
+                    if (imageAspects & VK_IMAGE_ASPECT_PLANE_1_BIT) {
+                        shaderStr << "        outputBufferCb.data[outCbIndex] = uint8_t(clamp(YCbCrRawIn.y, 0.0, 255.0));\n";
+                    }
+                    if (imageAspects & VK_IMAGE_ASPECT_PLANE_2_BIT) {
+                        shaderStr << "        outputBufferCr.data[outCrIndex] = uint8_t(clamp(YCbCrRawIn.z, 0.0, 255.0));\n";
+                    }
+                }
+            }
+
+            shaderStr << "    }\n"; // Close conditional chroma processing
+        }
+    } else {
+        // Writing to image
+        shaderStr << "    // Writing to image destination\n";
+
+        // Write Y component if PLANE_0_BIT is set
+        if (imageAspects & VK_IMAGE_ASPECT_PLANE_0_BIT) {
+            shaderStr << "    // Write Y component to Y plane\n"
+                      << "    imageStore(outputImageY, ivec3(pos, dstLayer), vec4(YCbCrRawIn.x, 0, 0, 1));\n\n";
+        }
+
+        // Write Cb/Cr components if their aspect flags are set
+        if ((imageAspects & (VK_IMAGE_ASPECT_PLANE_1_BIT | VK_IMAGE_ASPECT_PLANE_2_BIT)) != 0) {
+            // Add conditional check for chroma processing
+            shaderStr << "    // Process chroma data conditionally\n"
+                      << "    if (processChroma) {\n";
+
+            if (isOutputTwoPlane) {
+                // Two-plane output image format with interleaved CbCr
+                if ((imageAspects & VK_IMAGE_ASPECT_PLANE_1_BIT) != 0) {
+                    // Both Cb and Cr are needed
+                    shaderStr << "        // Write interleaved CbCr to 2-plane output image\n"
+                              << "        imageStore(outputImageCbCr, ivec3(chromaPos, dstLayer), "
+                              << "vec4(YCbCrRawIn.y, YCbCrRawIn.z, 0, 1));\n";
+                }
+            } else {
+                // Three-plane output image format with separate Cb and Cr planes
+                shaderStr << "        // Write separate Cb and Cr to 3-plane output image\n";
+
+                if (imageAspects & VK_IMAGE_ASPECT_PLANE_1_BIT) {
+                    shaderStr << "        imageStore(outputImageCb, ivec3(chromaPos, dstLayer), vec4(YCbCrRawIn.y, 0, 0, 1));\n";
+                }
+
+                if (imageAspects & VK_IMAGE_ASPECT_PLANE_2_BIT) {
+                    shaderStr << "        imageStore(outputImageCr, ivec3(chromaPos, dstLayer), vec4(YCbCrRawIn.z, 0, 0, 1));\n";
+                }
+            }
+
+            // Close the conditional block
+            shaderStr << "    }\n";
+        }
+    }
+
+    // End the function
+    shaderStr << "}\n\n";
+}
+
+uint32_t VulkanFilterYuvCompute::ShaderGenerateImagePlaneDescriptors(std::stringstream& shaderStr,
+                                                                     VkImageAspectFlags& imageAspects,
+                                                                     const char *imageName,
+                                                                     VkFormat    imageFormat,
+                                                                     bool isInput,
+                                                                     uint32_t startBinding,
+                                                                     uint32_t set,
+                                                                     bool imageArray)
+{
+    shaderStr << " // The " << (isInput ? "input" : "output") << " image binding\n";
     // Image binding goes in this pattern:
     // offset 0: RGBA image
     // offset 1: multi-planar image plane Y
@@ -267,7 +1183,8 @@ void VulkanFilterYuvCompute::ShaderGeneratePlaneDescriptors(std::stringstream& s
 
         } else if (inputMpInfo->planesLayout.numberOfExtraPlanes == 2) {
 
-            imageAspects = VK_IMAGE_ASPECT_PLANE_0_BIT | VK_IMAGE_ASPECT_PLANE_2_BIT;
+            imageAspects = VK_IMAGE_ASPECT_PLANE_0_BIT | VK_IMAGE_ASPECT_PLANE_1_BIT |
+                                                         VK_IMAGE_ASPECT_PLANE_2_BIT;
 
             GenImageIoBindingLayout(shaderStr, imageName, "Cb",
                                     vkFormatLookUp(inputMpInfo->vkPlaneFormat[1])->name,
@@ -290,10 +1207,631 @@ void VulkanFilterYuvCompute::ShaderGeneratePlaneDescriptors(std::stringstream& s
         GenImageIoBindingLayout(shaderStr, imageName, "RGB",
                                 vkFormatLookUp(imageFormat)->name,
                                 isInput,
-                                startBinding,
+                                startBinding++,
                                 set,
                                 imageArray);
     }
+
+    return startBinding;
+}
+
+uint32_t VulkanFilterYuvCompute::ShaderGenerateBufferPlaneDescriptors(std::stringstream& shaderStr,
+                                                                      VkImageAspectFlags& imageAspects,
+                                                                      const char *bufferName,
+                                                                      VkFormat    bufferFormat,
+                                                                      bool isInput,
+                                                                      uint32_t startBinding,
+                                                                      uint32_t set,
+                                                                      VkDescriptorType bufferType)
+{
+    // Buffer binding follows the same pattern as image binding:
+    // offset 0: Single RGBA buffer with all data
+    // offset 1: Y plane buffer
+    // offset 2: 2-planar CbCr buffer or 3-planar Cb buffer
+    // offset 3: 3-planar Cr buffer
+    const VkMpFormatInfo* inputMpInfo = YcbcrVkFormatInfo(bufferFormat);
+
+    // Determine element size based on format
+    const char* elementType = "uint8_t";  // Default to 8-bit
+
+    shaderStr << " // The " << (isInput ? "input" : "output") << " buffer binding\n";
+    // Check format for higher bit depths (16-bit formats)
+    const VkFormatDesc* formatInfo = vkFormatLookUp(bufferFormat);
+    if (formatInfo && formatInfo->name) {
+        if (strstr(formatInfo->name, "16") != nullptr ||
+            strstr(formatInfo->name, "R16") != nullptr ||
+            strstr(formatInfo->name, "10") != nullptr ||
+            strstr(formatInfo->name, "12") != nullptr) {
+            elementType = "uint16_t";  // Use 16-bit for 10/12/16-bit formats
+        }
+    }
+
+    if (inputMpInfo) {
+        // For multi-planar formats, define separate buffers for each plane
+
+        // Y plane buffer (plane 0)
+        GenBufferIoBindingLayout(shaderStr, bufferName, "Y",
+                                 elementType,
+                                 bufferType,
+                                 isInput,
+                                 ++startBinding,
+                                 set);
+
+        if (inputMpInfo->planesLayout.numberOfExtraPlanes == 1) {
+            // 2-plane format (NV12, NV21, etc.)
+            imageAspects = VK_IMAGE_ASPECT_PLANE_0_BIT | VK_IMAGE_ASPECT_PLANE_1_BIT;
+
+            GenBufferIoBindingLayout(shaderStr, bufferName, "CbCr",
+                                     elementType,
+                                     bufferType,
+                                     isInput,
+                                     ++startBinding,
+                                     set);
+
+        } else if (inputMpInfo->planesLayout.numberOfExtraPlanes == 2) {
+            // 3-plane format (YUV 4:2:0, 4:2:2, 4:4:4, etc.)
+            imageAspects = VK_IMAGE_ASPECT_PLANE_0_BIT | VK_IMAGE_ASPECT_PLANE_1_BIT |
+                                                         VK_IMAGE_ASPECT_PLANE_2_BIT;
+
+            GenBufferIoBindingLayout(shaderStr, bufferName, "Cb",
+                                     elementType,
+                                     bufferType,
+                                     isInput,
+                                     ++startBinding,
+                                     set);
+
+            GenBufferIoBindingLayout(shaderStr, bufferName, "Cr",
+                                     elementType,
+                                     bufferType,
+                                     isInput,
+                                     ++startBinding,
+                                     set);
+        }
+    } else {
+        // For single-plane formats (like RGBA)
+        imageAspects = VK_IMAGE_ASPECT_COLOR_BIT;
+
+        GenBufferIoBindingLayout(shaderStr, bufferName, "RGB",
+                                 elementType,
+                                 bufferType,
+                                 isInput,
+                                 startBinding++,
+                                 set);
+    }
+
+    return startBinding;
+}
+
+
+uint32_t VulkanFilterYuvCompute::ShaderGeneratePlaneDescriptors(std::stringstream& shaderStr,
+                                                                bool isInput,
+                                                                uint32_t startBinding,
+                                                                uint32_t set,
+                                                                bool imageArray,
+                                                                VkDescriptorType bufferType)
+{
+
+    if ((isInput && m_inputIsBuffer) || (!isInput && m_outputIsBuffer)) {
+
+        return ShaderGenerateBufferPlaneDescriptors(shaderStr,
+                                             isInput ? m_inputImageAspects : m_outputImageAspects,
+                                             isInput ? "inputBuffer" : "outputBuffer",
+                                             isInput ? m_inputFormat : m_outputFormat,
+                                             isInput, // isInput
+                                             startBinding,    // startBinding
+                                             set,             // set
+                                             bufferType);
+    } else {
+
+        return ShaderGenerateImagePlaneDescriptors(shaderStr,
+                                            isInput ? m_inputImageAspects : m_outputImageAspects,
+                                            isInput ? "inputImage" : "outputImage",
+                                            isInput ? m_inputFormat : m_outputFormat,
+                                            isInput,       // isInput
+                                            startBinding,  // startBinding
+                                            set,           // set
+                                            imageArray  // imageArray
+                                            );
+    }
+}
+
+/**
+ * @brief Generates GLSL functions for YCbCr normalization with different bit depths
+ *
+ * Creates helper functions to normalize YCbCr values, handling different bit depths,
+ * and applying proper range adjustments (limited/full range).
+ *
+ * Process steps:
+ * 1. Calculate normalization parameters based on bit depth and range
+ * 2. Generate Y normalization function (scaling + offset)
+ * 3. Generate CbCr shifting functions (centering around zero)
+ * 4. Generate CbCr normalization functions (scaling + offset)
+ * 5. Generate bit-depth specific helpers for 10/12-bit formats
+ *
+ * @param shaderStr Output stringstream where the GLSL code will be written
+ * @param bitDepth The bit depth of the YCbCr data (8, 10, 12, or 16)
+ * @param isLimitedRange Whether values are limited range (true) or full range (false)
+ * @param hasChroma Whether to include chroma normalization functions
+ */
+static void GenYCbCrNormalizationFuncs(std::stringstream& shaderStr,
+                                       uint32_t bitDepth = 8,
+                                       bool isLimitedRange = true,
+                                       bool hasChroma = true)
+{
+    // STEP 1: Calculate normalization parameters based on bit depth and range
+    // ===========================================================================
+
+    // Use double precision for calculations to maintain precision
+    double maxValue = (1ULL << bitDepth) - 1.0;  // Max value for the given bit depth
+
+    // Limited range values for different bit depths
+    double yBlack, yWhite, cZero, cScale;
+
+    if (isLimitedRange) {
+        // Step 1.1: Calculate limited range (aka TV/Video range) values
+        // Use standard-compliant values for different bit depths
+        switch (bitDepth) {
+            case 10:
+                // 10-bit limited range: Y[64,940], C[64,960]
+                yBlack = 64.0;
+                yWhite = 940.0;
+                cZero = 64.0;
+                cScale = 896.0;  // 960 - 64
+                break;
+            case 12:
+                // 12-bit limited range: Y[256,3760], C[256,3840]
+                yBlack = 256.0;
+                yWhite = 3760.0;
+                cZero = 256.0;
+                cScale = 3584.0;  // 3840 - 256
+                break;
+            case 16:
+                // 16-bit limited range: scale 8-bit values by 2^8
+                yBlack = 16.0 * 256.0;
+                yWhite = 235.0 * 256.0;
+                cZero = 16.0 * 256.0;
+                cScale = 224.0 * 256.0;
+                break;
+            case 8:
+            default:
+                // 8-bit limited range: Y[16,235], C[16,240]
+                yBlack = 16.0;
+                yWhite = 235.0;
+                cZero = 16.0;
+                cScale = 224.0;
+                break;
+        }
+    } else {
+        // Step 1.2: Calculate full range values (same for all bit depths, just scaled)
+        yBlack = 0.0;
+        yWhite = maxValue;
+        cZero = 0.0;
+        cScale = maxValue;
+    }
+
+    // Step 1.3: Calculate normalization factors with double precision
+    double yRange = yWhite - yBlack;
+    double yFactor = 1.0 / yRange;
+    double yOffset = -yBlack * yFactor;
+    double cFactor = 1.0 / cScale;
+
+    // Format values with high precision for GLSL
+    std::stringstream ss;
+    ss.precision(16); // Use high precision for constants
+
+    // STEP 2: Generate Y normalization function
+    // ===========================================================================
+    shaderStr << "\n"
+              << "// Specify high precision for all floating point calculations\n"
+              << "precision highp float;\n"
+              << "precision highp int;\n"
+              << "\n"
+              << "// STEP 1: Normalize Y component for " << bitDepth << "-bit "
+              << (isLimitedRange ? "limited range" : "full range") << " content\n"
+              << "highp float normalizeY(highp float Y) {\n";
+
+    if (isLimitedRange) {
+        // Step 2.1: Limited range needs black level adjustment and scaling
+        // Format with high precision
+        ss.str("");
+        ss << std::fixed << yFactor;
+        std::string yFactorStr = ss.str();
+
+        ss.str("");
+        ss << std::fixed << yOffset;
+        std::string yOffsetStr = ss.str();
+
+        shaderStr << "    // Step 1.1: Map from [" << yBlack << ", " << yWhite << "] to [0.0, 1.0]\n"
+                  << "    // Formula: normalizedY = (Y - yBlack) / yRange = Y * yFactor + yOffset\n"
+                  << "    return Y * " << yFactorStr << " + " << yOffsetStr << ";\n";
+    } else {
+        // Step 2.2: Full range just needs scaling
+        shaderStr << "    // Step 1.1: Map from [0, " << maxValue << "] to [0.0, 1.0]\n"
+                  << "    // Formula: normalizedY = Y / maxValue\n"
+                  << "    return Y / " << maxValue << ";\n";
+    }
+    shaderStr << "}\n\n";
+
+    if (hasChroma) {
+        // STEP 3: Generate CbCr shifting functions
+        // ===========================================================================
+
+        // Step 3.1: Generate CbCr shifting function for vec2 (common for 2-plane formats)
+        shaderStr << "// STEP 2: Shift CbCr components from centered range to [-0.5, 0.5] range\n"
+                  << "highp vec2 shiftCbCr(highp vec2 CbCr) {\n"
+                  << "    // Step 2.1: Shift from [0.0, 1.0] to [-0.5, 0.5]\n"
+                  << "    return CbCr - 0.5;\n"
+                  << "}\n\n";
+
+        // Step 3.2: Generate CbCr shifting function for vec3 (for full YCbCr triplet)
+        shaderStr << "// Step 2 (alternative): Shift YCbCr components, leaving Y alone but centering CbCr\n"
+                  << "highp vec3 shiftCbCr(highp vec3 ycbcr) {\n"
+                  << "    // Step 2.1: Shift only Cb and Cr from [0.0, 1.0] to [-0.5, 0.5]\n"
+                  << "    const highp vec3 shift = vec3(0.0, -0.5, -0.5);\n"
+                  << "    return ycbcr + shift;\n"
+                  << "}\n\n";
+
+        // STEP 4: Generate CbCr normalization function
+        // ===========================================================================
+        shaderStr << "// STEP 3: Normalize CbCr components for " << bitDepth << "-bit "
+                  << (isLimitedRange ? "limited range" : "full range") << " content\n"
+                  << "highp vec2 normalizeCbCr(highp vec2 CbCr) {\n";
+
+        if (isLimitedRange) {
+            // Step 4.1: Limited range needs zero level adjustment and scaling
+            // Format with high precision
+            ss.str("");
+            ss << std::fixed << cZero;
+            std::string cZeroStr = ss.str();
+
+            ss.str("");
+            ss << std::fixed << cFactor;
+            std::string cFactorStr = ss.str();
+
+            shaderStr << "    // Step 3.1: Map from [" << cZero << ", " << (cZero + cScale) << "] to [0.0, 1.0]\n"
+                      << "    // Formula: normalizedCbCr = (CbCr - cZero) / cScale\n"
+                      << "    return (CbCr - " << cZeroStr << ") * " << cFactorStr << ";\n";
+        } else {
+            // Step 4.2: Full range just needs scaling
+            shaderStr << "    // Step 3.1: Map from [0, " << maxValue << "] to [0.0, 1.0]\n"
+                      << "    // Formula: normalizedCbCr = CbCr / maxValue\n"
+                      << "    return CbCr / " << maxValue << ";\n";
+        }
+        shaderStr << "}\n\n";
+    }
+
+    // STEP 5: Generate bit-depth specific helper functions for 10/12-bit formats
+    // ===========================================================================
+    if (bitDepth == 10) {
+        shaderStr << "// STEP 4: Special 10-bit format handling functions\n"
+                  << "// 10-bit packing formats often store values in uint16 or uint32 with specific bit layouts\n"
+                  << "\n"
+                  << "// Extract 10-bit value from 16-bit storage (common for P010, P210, etc.)\n"
+                  << "highp float extract10BitFrom16Bit(highp uint value) {\n"
+                  << "    // Most 10-bit formats store the value in the most significant 10 bits\n"
+                  << "    highp uint raw10bit = value >> 6; // Shift right to remove 6 padding bits\n"
+                  << "    return float(raw10bit);\n"
+                  << "}\n\n"
+
+                  << "// Extract 10-bit value from 16-bit storage as normalized float\n"
+                  << "highp float extract10BitNormalized(highp uint value) {\n"
+                  << "    highp uint raw10bit = value >> 6; // Shift right to remove 6 padding bits\n"
+                  << "    return float(raw10bit) / 1023.0; // Normalize to [0,1]\n"
+                  << "}\n\n"
+
+                  << "// Normalize packed 10-bit YUV directly\n"
+                  << "highp vec3 normalize10BitYUV(highp uvec3 packedYuv) {\n"
+                  << "    // Extract 10-bit components\n"
+                  << "    highp float y = extract10BitFrom16Bit(packedYuv.x);\n"
+                  << "    highp float cb = extract10BitFrom16Bit(packedYuv.y);\n"
+                  << "    highp float cr = extract10BitFrom16Bit(packedYuv.z);\n"
+                  << "    // Normalize components\n"
+                  << "    y = normalizeY(y);\n"
+                  << "    highp vec2 cbcr = normalizeCbCr(vec2(cb, cr));\n"
+                  << "    return vec3(y, cbcr);\n"
+                  << "}\n\n";
+    } else if (bitDepth == 12) {
+        shaderStr << "// STEP 4: Special 12-bit format handling functions\n"
+                  << "// 12-bit packing formats often store values in uint16 or uint32 with specific bit layouts\n"
+                  << "\n"
+                  << "// Extract 12-bit value from 16-bit storage (common for P012, P212, etc.)\n"
+                  << "highp float extract12BitFrom16Bit(highp uint value) {\n"
+                  << "    // Most 12-bit formats store the value in the most significant 12 bits\n"
+                  << "    highp uint raw12bit = value >> 4; // Shift right to remove 4 padding bits\n"
+                  << "    return float(raw12bit);\n"
+                  << "}\n\n"
+
+                  << "// Extract 12-bit value from 16-bit storage as normalized float\n"
+                  << "highp float extract12BitNormalized(highp uint value) {\n"
+                  << "    highp uint raw12bit = value >> 4; // Shift right to remove 4 padding bits\n"
+                  << "    return float(raw12bit) / 4095.0; // Normalize to [0,1]\n"
+                  << "}\n\n"
+
+                  << "// Normalize packed 12-bit YUV directly\n"
+                  << "highp vec3 normalize12BitYUV(highp uvec3 packedYuv) {\n"
+                  << "    // Extract 12-bit components\n"
+                  << "    highp float y = extract12BitFrom16Bit(packedYuv.x);\n"
+                  << "    highp float cb = extract12BitFrom16Bit(packedYuv.y);\n"
+                  << "    highp float cr = extract12BitFrom16Bit(packedYuv.z);\n"
+                  << "    // Normalize components\n"
+                  << "    y = normalizeY(y);\n"
+                  << "    highp vec2 cbcr = normalizeCbCr(vec2(cb, cr));\n"
+                  << "    return vec3(y, cbcr);\n"
+                  << "}\n\n";
+    }
+}
+
+/**
+ * @brief Generates GLSL functions for YCbCr denormalization with different bit depths
+ *
+ * Creates helper functions to denormalize YCbCr values from normalized [0-1] for Y and
+ * [-0.5,0.5] for CbCr back to the appropriate bit depth and range (limited or full).
+ * This is the inverse operation of GenYCbCrNormalizationFuncs.
+ *
+ * Process steps:
+ * 1. Calculate denormalization parameters based on bit depth and range
+ * 2. Generate Y denormalization function (inverse scaling + offset)
+ * 3. Generate CbCr unshifting functions (recentering to [0,1])
+ * 4. Generate CbCr denormalization functions (inverse scaling + offset)
+ * 5. Generate combined convenience functions
+ * 6. Generate bit-depth specific packing helpers for 10/12-bit formats
+ *
+ * @param shaderStr Output stringstream where the GLSL code will be written
+ * @param bitDepth The target bit depth for the YCbCr data (8, 10, 12, or 16)
+ * @param isLimitedRange Whether target values are limited range (true) or full range (false)
+ * @param hasChroma Whether to include chroma denormalization functions
+ */
+static void GenYCbCrDeNormalizationFuncs(std::stringstream& shaderStr,
+                                         uint32_t bitDepth = 8,
+                                         bool isLimitedRange = true,
+                                         bool hasChroma = true)
+{
+    // STEP 1: Calculate denormalization parameters based on bit depth and range
+    // ===========================================================================
+
+    // Use double precision for calculations to maintain precision
+    double maxValue = (1ULL << bitDepth) - 1.0;  // Max value for the given bit depth
+
+    // Limited range values for different bit depths
+    double yBlack, yWhite, cZero, cScale;
+
+    if (isLimitedRange) {
+        // Step 1.1: Calculate limited range (aka TV/Video range) values
+        // Use standard-compliant values for different bit depths
+        switch (bitDepth) {
+            case 10:
+                // 10-bit limited range: Y[64,940], C[64,960]
+                yBlack = 64.0;
+                yWhite = 940.0;
+                cZero = 64.0;
+                cScale = 896.0;  // 960 - 64
+                break;
+            case 12:
+                // 12-bit limited range: Y[256,3760], C[256,3840]
+                yBlack = 256.0;
+                yWhite = 3760.0;
+                cZero = 256.0;
+                cScale = 3584.0;  // 3840 - 256
+                break;
+            case 16:
+                // 16-bit limited range: scale 8-bit values by 2^8
+                yBlack = 16.0 * 256.0;
+                yWhite = 235.0 * 256.0;
+                cZero = 16.0 * 256.0;
+                cScale = 224.0 * 256.0;
+                break;
+            case 8:
+            default:
+                // 8-bit limited range: Y[16,235], C[16,240]
+                yBlack = 16.0;
+                yWhite = 235.0;
+                cZero = 16.0;
+                cScale = 224.0;
+                break;
+        }
+    } else {
+        // Step 1.2: Calculate full range values (same for all bit depths, just scaled)
+        yBlack = 0.0;
+        yWhite = maxValue;
+        cZero = 0.0;
+        cScale = maxValue;
+    }
+
+    // Step 1.3: Calculate denormalization factors (inverse of normalization)
+    double yRange = yWhite - yBlack;
+
+    // Format values with high precision for GLSL
+    std::stringstream ss;
+    ss.precision(16); // Use high precision for constants
+
+    // STEP 2: Generate Y denormalization function
+    // ===========================================================================
+    shaderStr << "\n"
+              << "// Specify high precision for all floating point calculations\n"
+              << "precision highp float;\n"
+              << "precision highp int;\n"
+              << "\n"
+              << "// STEP 1: Denormalize Y component from [0.0, 1.0] back to " << bitDepth << "-bit "
+              << (isLimitedRange ? "limited range" : "full range") << " content\n"
+              << "highp float denormalizeY(highp float normalizedY) {\n";
+
+    if (isLimitedRange) {
+        // Step 2.1: Limited range needs scaling and black level adjustment
+        // Format with high precision
+        ss.str("");
+        ss << std::fixed << yRange;
+        std::string yRangeStr = ss.str();
+
+        ss.str("");
+        ss << std::fixed << yBlack;
+        std::string yBlackStr = ss.str();
+
+        shaderStr << "    // Step 1.1: Map from [0.0, 1.0] back to [" << yBlack << ", " << yWhite << "]\n"
+                  << "    // Formula: Y = normalizedY * yRange + yBlack\n"
+                  << "    return normalizedY * " << yRangeStr << " + " << yBlackStr << ";\n";
+    } else {
+        // Step 2.2: Full range just needs scaling
+        shaderStr << "    // Step 1.1: Map from [0.0, 1.0] back to [0, " << maxValue << "]\n"
+                  << "    // Formula: Y = normalizedY * maxValue\n"
+                  << "    return normalizedY * " << maxValue << ";\n";
+    }
+    shaderStr << "}\n\n";
+
+    if (hasChroma) {
+        // STEP 3: Generate CbCr unshifting function
+        // ===========================================================================
+        shaderStr << "// STEP 2: Unshift CbCr components from [-0.5, 0.5] range back to centered range [0.0, 1.0]\n"
+                  << "highp vec2 unshiftCbCr(highp vec2 shiftedCbCr) {\n"
+                  << "    // Step 2.1: Shift from [-0.5, 0.5] back to [0.0, 1.0]\n"
+                  << "    return shiftedCbCr + 0.5;\n"
+                  << "}\n\n";
+
+        // STEP 4: Generate CbCr denormalization function
+        // ===========================================================================
+        shaderStr << "// STEP 3: Denormalize CbCr components from [0.0, 1.0] back to " << bitDepth << "-bit "
+                  << (isLimitedRange ? "limited range" : "full range") << " content\n"
+                  << "highp vec2 denormalizeCbCr(highp vec2 normalizedCbCr) {\n";
+
+        if (isLimitedRange) {
+            // Step 4.1: Limited range needs scaling and zero level adjustment
+            // Format with high precision
+            ss.str("");
+            ss << std::fixed << cScale;
+            std::string cScaleStr = ss.str();
+
+            ss.str("");
+            ss << std::fixed << cZero;
+            std::string cZeroStr = ss.str();
+
+            shaderStr << "    // Step 3.1: Map from [0.0, 1.0] back to [" << cZero << ", " << (cZero + cScale) << "]\n"
+                      << "    // Formula: CbCr = normalizedCbCr * cScale + cZero\n"
+                      << "    return normalizedCbCr * " << cScaleStr << " + " << cZeroStr << ";\n";
+        } else {
+            // Step 4.2: Full range just needs scaling
+            shaderStr << "    // Step 3.1: Map from [0.0, 1.0] back to [0, " << maxValue << "]\n"
+                      << "    // Formula: CbCr = normalizedCbCr * maxValue\n"
+                      << "    return normalizedCbCr * " << maxValue << ";\n";
+        }
+        shaderStr << "}\n\n";
+
+        // STEP 5: Generate combined convenience functions
+        // ===========================================================================
+
+        // Step 5.1: Combined unshift and denormalize
+        shaderStr << "// STEP 4: Combined function: unshift and denormalize CbCr in one step\n"
+                  << "highp vec2 unshiftAndDenormalizeCbCr(highp vec2 shiftedCbCr) {\n"
+                  << "    // Step 4.1: First unshift from [-0.5, 0.5] to [0.0, 1.0], then denormalize\n"
+                  << "    return denormalizeCbCr(unshiftCbCr(shiftedCbCr));\n"
+                  << "}\n\n";
+
+        // Step 5.2: Full YCbCr denormalization
+        shaderStr << "// STEP 5: Combined function to denormalize full YCbCr triplet\n"
+                  << "highp vec3 denormalizeYCbCr(highp vec3 normalizedYCbCr) {\n"
+                  << "    // Step 5.1: Denormalize Y component\n"
+                  << "    highp float y = denormalizeY(normalizedYCbCr.x);\n"
+                  << "    // Step 5.2: Unshift and denormalize Cb and Cr components\n"
+                  << "    highp vec2 cbcr = denormalizeCbCr(vec2(normalizedYCbCr.y + 0.5, normalizedYCbCr.z + 0.5));\n"
+                  << "    // Step 5.3: Combine the components into a single vector\n"
+                  << "    return vec3(y, cbcr);\n"
+                  << "}\n\n";
+    }
+
+    // STEP 6: Generate bit-depth specific packing helpers for 10/12-bit formats
+    // ===========================================================================
+    if (bitDepth == 10) {
+        shaderStr << "// STEP 6: Special 10-bit format packing functions\n"
+                  << "// Pack 10-bit values into 16-bit storage (common for P010, P210, etc.)\n"
+                  << "\n"
+                  << "// Pack 10-bit value into 16-bit storage (MSB aligned with padding)\n"
+                  << "highp uint pack10BitTo16Bit(highp float value) {\n"
+                  << "    // Clamp the input value to the valid range for 10-bit\n"
+                  << "    highp uint raw10bit = uint(clamp(value, 0.0, 1023.0));\n"
+                  << "    // Shift left by 6 bits to store in MSB format (standard for P010, etc.)\n"
+                  << "    return raw10bit << 6;\n"
+                  << "}\n\n"
+
+                  << "// Pack normalized [0,1] value into 10-bit MSB aligned format\n"
+                  << "highp uint packNormalizedTo10Bit(highp float normalizedValue) {\n"
+                  << "    // Scale to 10-bit range and pack\n"
+                  << "    highp uint raw10bit = uint(clamp(normalizedValue * 1023.0, 0.0, 1023.0));\n"
+                  << "    return raw10bit << 6;\n"
+                  << "}\n\n"
+
+                  << "// Pack denormalized YUV to 10-bit values\n"
+                  << "highp uvec3 packYUVTo10Bit(highp vec3 yuv) {\n"
+                  << "    // Denormalize components first\n"
+                  << "    highp vec3 denormYuv = denormalizeYCbCr(yuv);\n"
+                  << "    // Pack each component into 16-bit storage (MSB aligned)\n"
+                  << "    return uvec3(\n"
+                  << "        pack10BitTo16Bit(denormYuv.x),  // Y\n"
+                  << "        pack10BitTo16Bit(denormYuv.y),  // Cb\n"
+                  << "        pack10BitTo16Bit(denormYuv.z)   // Cr\n"
+                  << "    );\n"
+                  << "}\n\n";
+    } else if (bitDepth == 12) {
+        shaderStr << "// STEP 6: Special 12-bit format packing functions\n"
+                  << "// Pack 12-bit values into 16-bit storage (common for P012, P212, etc.)\n"
+                  << "\n"
+                  << "// Pack 12-bit value into 16-bit storage (MSB aligned with padding)\n"
+                  << "highp uint pack12BitTo16Bit(highp float value) {\n"
+                  << "    // Clamp the input value to the valid range for 12-bit\n"
+                  << "    highp uint raw12bit = uint(clamp(value, 0.0, 4095.0));\n"
+                  << "    // Shift left by 4 bits to store in MSB format (standard for P012, etc.)\n"
+                  << "    return raw12bit << 4;\n"
+                  << "}\n\n"
+
+                  << "// Pack normalized [0,1] value into 12-bit MSB aligned format\n"
+                  << "highp uint packNormalizedTo12Bit(highp float normalizedValue) {\n"
+                  << "    // Scale to 12-bit range and pack\n"
+                  << "    highp uint raw12bit = uint(clamp(normalizedValue * 4095.0, 0.0, 4095.0));\n"
+                  << "    return raw12bit << 4;\n"
+                  << "}\n\n"
+
+                  << "// Pack denormalized YUV to 12-bit values\n"
+                  << "highp uvec3 packYUVTo12Bit(highp vec3 yuv) {\n"
+                  << "    // Denormalize components first\n"
+                  << "    highp vec3 denormYuv = denormalizeYCbCr(yuv);\n"
+                  << "    // Pack each component into 16-bit storage (MSB aligned)\n"
+                  << "    return uvec3(\n"
+                  << "        pack12BitTo16Bit(denormYuv.x),  // Y\n"
+                  << "        pack12BitTo16Bit(denormYuv.y),  // Cb\n"
+                  << "        pack12BitTo16Bit(denormYuv.z)   // Cr\n"
+                  << "    );\n"
+                  << "}\n\n";
+    }
+}
+
+/**
+ * @brief Generates GLSL function for YCbCr format conversion with normalization and denormalization
+ *
+ * Creates a helper function for converting between different YCbCr formats
+ * that normalizes input values, then denormalizes to the target format.
+ * This handles both bit-depth and range conversions.
+ *
+ * @param shaderStr Output stringstream where the GLSL code will be written
+ * @param inputBitDepth The bit depth of input YCbCr data (8, 10, 12, or 16 bits)
+ * @param outputBitDepth The bit depth of output YCbCr data (8, 10, 12, or 16 bits)
+ * @param isInputLimitedRange Whether the input uses limited range (true) or full range (false)
+ * @param isOutputLimitedRange Whether the output uses limited range (true) or full range (false)
+ */
+static void GenConvertYCbCrFormat(std::stringstream& shaderStr,
+                                  uint32_t inputBitDepth = 8,
+                                  uint32_t outputBitDepth = 8,
+                                  bool isInputLimitedRange = true,
+                                  bool isOutputLimitedRange = true)
+{
+    shaderStr <<
+        "// Function to handle YCbCr format conversion with proper normalization\n"
+        "vec3 convertYCbCrFormat(vec3 YCbCrRawIn) {\n"
+        "    // Step 1: Normalize input YCbCr values to [0-1] range\n"
+        "    float normalizedY = normalizeY(YCbCrRawIn.x);\n"
+        "    vec2 normalizedCbCr = normalizeCbCr(vec2(YCbCrRawIn.y, YCbCrRawIn.z));\n\n"
+        "    // Step 2: Denormalize to output bit depth and range\n"
+        "    float y = denormalizeY(normalizedY);\n"
+        "    vec2 cbcr = denormalizeCbCr(normalizedCbCr);\n\n"
+        "    // Return the converted values\n"
+        "    return vec3(y, cbcr.x, cbcr.y);\n"
+        "}\n\n";
 }
 
 size_t VulkanFilterYuvCompute::InitYCBCR2RGBA(std::string& computeShader)
@@ -307,56 +1845,45 @@ size_t VulkanFilterYuvCompute::InitYCBCR2RGBA(std::string& computeShader)
 
     // Create compute pipeline
     std::stringstream shaderStr;
+
+    // 1. Generate header and push constants
     GenHeaderAndPushConst(shaderStr);
+
+    // 2. Generate IO bindings
     // Input image
-    shaderStr << " // The input YCbCr image binding\n";
+    shaderStr << " // The input YCbCr input binding\n";
+    // Input Descriptors
     ShaderGeneratePlaneDescriptors(shaderStr,
-                                   m_inputImageAspects,
-                                   "inputImage",
-                                   m_inputFormat,
                                    true, // isInput
                                    0,    // startBinding
                                    0,    // set
-                                   true  // imageArray
-                                   );
-
-        // Output image
-        shaderStr << " // The output RGBA image binding\n";
-        ShaderGeneratePlaneDescriptors(shaderStr,
-                                       m_outputImageAspects,
-                                       "outputImage",
-                                       m_outputFormat,
-                                       false, // isInput
-                                       4,     // startBinding
-                                       0,     // set
-                                       true   // imageArray
-                                       );
-
-        shaderStr << "\n"
-                     " // TODO: normalize only narrow\n"
-                     "float normalizeY(float Y) {\n"
-                      "    // return (Y - (16.0 / 255.0)) * (255.0 / (235.0 - 16.0));\n"
-                      "    return (Y - 0.0627451) * 1.164383562;\n"
-                     "}\n"
-                     "\n"
-                     "vec2 shiftCbCr(vec2 CbCr) {\n"
-                     "    return CbCr - 0.5;\n"
-                     "}\n"
-                     "\n"
-                     "vec3 shiftCbCr(vec3 ycbcr) {\n"
-                     "    const vec3 shiftCbCr  = vec3(0.0, -0.5, -0.5);\n"
-                     "    return ycbcr + shiftCbCr;\n"
-                     "}\n"
-                     "\n"
-                     " // TODO: normalize only narrow\n"
-                     "vec2 normalizeCbCr(vec2 CbCr) {\n"
-                     "    // return (CbCr - (16.0 / 255.0)) / ((240.0 - 16.0) / 255.0);\n"
-                     "    return (CbCr - 0.0627451) * 1.138392857;\n"
-                     "}\n"
-                     "\n";
-
-    const VkSamplerYcbcrConversionCreateInfo& samplerYcbcrConversionCreateInfo = m_samplerYcbcrConversion.GetSamplerYcbcrConversionCreateInfo();
-    const VkMpFormatInfo * mpInfo = YcbcrVkFormatInfo(samplerYcbcrConversionCreateInfo.format);
+                                   true,
+                                   VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
+
+    shaderStr << " // The output RGBA image binding\n";
+    // Output Descriptors
+    ShaderGeneratePlaneDescriptors(shaderStr,
+                                   false, // isInput
+                                   4,     // startBinding
+                                   0,     // set
+                                   true,  // imageArray
+                                   VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
+
+    // Get format information to determine bit depth
+    const VkSamplerYcbcrConversionCreateInfo& samplerYcbcrConversionCreateInfo =
+        m_samplerYcbcrConversion.GetSamplerYcbcrConversionCreateInfo();
+    const VkMpFormatInfo* mpInfo = YcbcrVkFormatInfo(samplerYcbcrConversionCreateInfo.format);
+
+    // Determine bit depth from the format
+    uint32_t bitDepth = mpInfo ? GetBitsPerChannel(mpInfo->planesLayout) : 8;
+
+    // Determine if we're using limited or full range
+    bool isLimitedRange = (samplerYcbcrConversionCreateInfo.ycbcrRange == VK_SAMPLER_YCBCR_RANGE_ITU_NARROW);
+
+    // 3. Generate helper functions for YCbCr normalization with proper bit depth handling
+    GenYCbCrNormalizationFuncs(shaderStr, bitDepth, isLimitedRange, true);
+
+    // 4. Generate YCbCr to RGB conversion function
     const unsigned int bpp = (8 + mpInfo->planesLayout.bpp * 2);
 
     const YcbcrBtStandard btStandard = GetYcbcrPrimariesConstantsId(samplerYcbcrConversionCreateInfo.ycbcrModel);
@@ -367,7 +1894,6 @@ size_t VulkanFilterYuvCompute::InitYCBCR2RGBA(std::string& computeShader)
                                     rangeConstants.cbMax,
                                     rangeConstants.crMax);
 
-
     shaderStr <<
         "vec3 convertYCbCrToRgb(vec3 yuv) {\n"
         "    vec3 rgb;\n";
@@ -377,7 +1903,7 @@ size_t VulkanFilterYuvCompute::InitYCBCR2RGBA(std::string& computeShader)
         "}\n"
         "\n";
 
-
+    // 5. Generate color range normalization function
     YcbcrNormalizeColorRange yCbCrNormalizeColorRange(bpp,
             (samplerYcbcrConversionCreateInfo.ycbcrModel == VK_SAMPLER_YCBCR_MODEL_CONVERSION_RGB_IDENTITY) ?
                     YCBCR_COLOR_RANGE_NATURAL : (YCBCR_COLOR_RANGE)samplerYcbcrConversionCreateInfo.ycbcrRange);
@@ -390,21 +1916,51 @@ size_t VulkanFilterYuvCompute::InitYCBCR2RGBA(std::string& computeShader)
         "}\n"
         "\n";
 
+    // 6. Generate function to fetch YCbCr components from images
+    shaderStr <<
+        "vec3 fetchYCbCrFromImage(ivec3 pos) {\n"
+        "    // Fetch from the texture.\n"
+        "    float Y = imageLoad(inputImageY, pos).r;\n"
+        "    // For subsampled formats, divide by 2\n"
+        "    vec2 CbCr = imageLoad(inputImageCbCr, ivec3(pos.xy/2, pos.z)).rg;\n"
+        "    return vec3(Y, CbCr);\n"
+        "}\n"
+        "\n";
+
+    // 7. Generate function to write RGBA to output image
+    shaderStr <<
+        "void writeRgbaToImage(vec4 rgba, ivec3 pos) {\n"
+        "    imageStore(outputImageRGB, pos, rgba);\n"
+        "}\n"
+        "\n";
+
+    // 8. Main function
     shaderStr <<
         "void main()\n"
         "{\n";
+
+    // 9. Handle position calculation
     GenHandleImagePosition(shaderStr);
+
+    // 10. Calculate source position with replication if enabled
     GenHandleSourcePositionWithReplicate(shaderStr, m_enableRowAndColumnReplication);
+
+    // 11. YCbCr to RGB conversion
     shaderStr <<
-        "    // Fetch from the texture.\n"
-        "    float Y = imageLoad(inputImageY, ivec3(srcPos, pushConstants.srcImageLayer)).r;\n"
-        "    // TODO: it is /2 only for sub-sampled formats\n"
-        "    vec2 CbCr = imageLoad(inputImageCbCr, ivec3(srcPos/2, pushConstants.srcImageLayer)).rg;\n"
+        "    // Calculate position with layer\n"
+        "    ivec3 srcPos3D = ivec3(srcPos, pushConstants.srcLayer);\n"
+        "    ivec3 dstPos3D = ivec3(pos, pushConstants.dstLayer);\n"
+        "\n"
+        "    // Fetch YCbCr components\n"
+        "    vec3 ycbcr = fetchYCbCrFromImage(srcPos3D);\n"
+        "\n"
+        "    // Process: normalize, shift, and convert to RGB\n"
+        "    ycbcr = shiftCbCr(normalizeYCbCr(ycbcr));\n"
+        "    vec3 rgb = convertYCbCrToRgb(ycbcr);\n"
         "\n"
-        "    vec3 ycbcr = shiftCbCr(normalizeYCbCr(vec3(Y, CbCr)));\n"
-        "    vec4 rgba = vec4(convertYCbCrToRgb(ycbcr),1.0);\n"
-        "    // Store it back.\n"
-        "    imageStore(outputImageRGB, ivec3(pos, pushConstants.dstImageLayer), rgba);\n"
+        "    // Write final RGBA result\n"
+        "    vec4 rgba = vec4(rgb, 1.0);\n"
+        "    writeRgbaToImage(rgba, dstPos3D);\n"
         "}\n";
 
     computeShader = shaderStr.str();
@@ -429,51 +1985,171 @@ size_t VulkanFilterYuvCompute::InitYCBCRCOPY(std::string& computeShader)
     // 3-planar: Cb (R) binding = 6
     // 3-planar: Cr (R) binding = 7
 
+    // Get format information to determine bit depths
+    const VkMpFormatInfo* inputMpInfo = YcbcrVkFormatInfo(m_inputFormat);
+    const VkMpFormatInfo* outputMpInfo = YcbcrVkFormatInfo(m_outputFormat);
+
+    // Determine bit depth from the formats
+    const uint32_t inputBitDepth = inputMpInfo ? GetBitsPerChannel(inputMpInfo->planesLayout) : 8;
+    const uint32_t outputBitDepth = outputMpInfo ? GetBitsPerChannel(outputMpInfo->planesLayout) : 8;
+
+    // Determine if we're using limited or full range for input and output
+    // Default to limited range as it's more common for YCbCr content
+    const VkSamplerYcbcrConversionCreateInfo& samplerYcbcrConversionCreateInfo =
+        m_samplerYcbcrConversion.GetSamplerYcbcrConversionCreateInfo();
+    const bool isInputLimitedRange = (samplerYcbcrConversionCreateInfo.ycbcrRange == VK_SAMPLER_YCBCR_RANGE_ITU_NARROW);
+    const bool isOutputLimitedRange = isInputLimitedRange; // Usually same as input, but could be configurable
+
+    // Check if input or output are buffers
+    const bool isInputBuffer = m_inputIsBuffer;
+    const bool isOutputBuffer = m_outputIsBuffer;
+
+    // Check if we need to do any bit depth conversion
+    const bool needsBitDepthConversion = (inputBitDepth != outputBitDepth);
+
+    // Check if we need to do any range conversion
+    const bool needsRangeConversion = (isInputLimitedRange != isOutputLimitedRange);
+
     std::stringstream shaderStr;
+
+    // 1. Generate header and push constants
     GenHeaderAndPushConst(shaderStr);
-    // Input image
-    shaderStr << " // The input image binding\n";
+
+    // 2. Generate IO bindings
+    // Input Descriptors
     ShaderGeneratePlaneDescriptors(shaderStr,
-                                   m_inputImageAspects,
-                                   "inputImage",
-                                   m_inputFormat,
                                    true, // isInput
                                    0,    // startBinding
                                    0,    // set
-                                   true  // imageArray
-                                   );
+                                   true,
+                                   VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
 
-    // Output image
-    shaderStr << " // The output image binding\n";
+    // Output Descriptors
     ShaderGeneratePlaneDescriptors(shaderStr,
-                                   m_outputImageAspects,
-                                   "outputImage",
-                                   m_outputFormat,
                                    false, // isInput
                                    4,     // startBinding
                                    0,     // set
-                                   true   // imageArray
-                                   );
+                                   true,  // imageArray
+                                   VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
+
     shaderStr << "\n\n";
 
+    // Determine input and output plane configurations
+    const bool hasInputChroma = (m_inputImageAspects & (VK_IMAGE_ASPECT_PLANE_1_BIT | VK_IMAGE_ASPECT_PLANE_2_BIT)) != 0;
+    const bool hasOutputChroma = (m_outputImageAspects & (VK_IMAGE_ASPECT_PLANE_1_BIT | VK_IMAGE_ASPECT_PLANE_2_BIT)) != 0;
+
+    // Determine if input is two-plane (e.g., NV12) or three-plane (e.g., I420)
+    const bool isInputTwoPlane = (m_inputImageAspects & VK_IMAGE_ASPECT_PLANE_1_BIT) &&
+                          !(m_inputImageAspects & VK_IMAGE_ASPECT_PLANE_2_BIT);
+
+    // Determine if output is two-plane (e.g., NV12) or three-plane (e.g., I420)
+    const bool isOutputTwoPlane = (m_outputImageAspects & VK_IMAGE_ASPECT_PLANE_1_BIT) &&
+                           !(m_outputImageAspects & VK_IMAGE_ASPECT_PLANE_2_BIT);
+
+    // 3. Add any bit depth handling functions needed
+    if (isInputBuffer && inputBitDepth > 8) {
+        bool isMSB = true; // Default to MSB-aligned (most common case)
+        GenExtractHighBitDepthFunc(shaderStr, isMSB, inputBitDepth);
+    }
+
+    // 4. Add buffer read/write functions if needed
+    if (isInputBuffer) {
+        // Add fetch functions for Y and CbCr from buffer
+        GenFetchYFromBufferFunc(shaderStr, inputBitDepth > 8, inputBitDepth);
+        GenFetchCbCrFromBufferFunc(shaderStr, inputBitDepth > 8, inputBitDepth);
+    }
+
+    // 5. Add YCbCr normalization and denormalization functions for bit depth conversion
+    if (needsBitDepthConversion || needsRangeConversion) {
+        // Generate normalization functions for input format
+        GenYCbCrNormalizationFuncs(shaderStr, inputBitDepth, isInputLimitedRange, hasInputChroma);
+
+        // Generate denormalization functions for output format
+        GenYCbCrDeNormalizationFuncs(shaderStr, outputBitDepth, isOutputLimitedRange, hasOutputChroma);
+    }
+
+    // 6. Generate the read function for YCbCr data
+    GenReadYCbCrBuffer(shaderStr, isInputBuffer, inputBitDepth, isInputTwoPlane, m_inputEnableMsbToLsbShift, m_inputImageAspects);
+
+    // 7. Generate the write function for YCbCr data
+    GenWriteYCbCrBuffer(shaderStr, isOutputBuffer, outputBitDepth, isOutputTwoPlane, m_outputEnableLsbToMsbShift, m_outputImageAspects);
+
+    // 8. Helper function for combined normalization and denormalization
+    if (needsBitDepthConversion || needsRangeConversion) {
+        GenConvertYCbCrFormat(shaderStr, inputBitDepth, outputBitDepth, isInputLimitedRange, isOutputLimitedRange);
+    }
+
+    // 9. Main function
     shaderStr <<
         "void main()\n"
         "{\n";
-    GenHandleImagePosition(shaderStr);
+
+    // 10. Handle position calculation
+    if (isInputBuffer || isOutputBuffer) {
+        // Use buffer position calculation
+        GenHandleBufferPosition(shaderStr);
+    } else {
+        // Use image position calculation
+        GenHandleImagePosition(shaderStr);
+    }
+
+    // 11. Calculate source position with replication if enabled
     GenHandleSourcePositionWithReplicate(shaderStr, m_enableRowAndColumnReplication);
+
+    // 12. Handle YCbCr processing
+
+    // For inputs with chroma, we need to handle subsampling
+    // Get subsampling ratios for input format
+    const uint32_t chromaHorzRatio = (inputMpInfo != nullptr) ? (1 << inputMpInfo->planesLayout.secondaryPlaneSubsampledX) : 1;
+    const uint32_t chromaVertRatio = (inputMpInfo != nullptr) ? (1 << inputMpInfo->planesLayout.secondaryPlaneSubsampledY) : 1;
+
+    // Generate condition for chroma processing based on actual subsampling
     shaderStr <<
-        "    // Read Y value from source Y plane and write it to destination Y plane\n"
-        "    float Y = imageLoad(inputImageY, ivec3(srcPos, pushConstants.srcImageLayer)).r;\n"
-        "    imageStore(outputImageY, ivec3(pos, pushConstants.dstImageLayer), vec4(Y, 0, 0, 1));\n"
-        "\n"
-        "    // Do the same for the CbCr plane, but remember about the 4:2:0 subsampling\n"
-        "    if (srcPos % 2 == ivec2(0, 0)) {\n"
-        "        srcPos /= 2;\n"
-        "        pos /= 2;\n"
-        "        vec2 CbCr = imageLoad(inputImageCbCr, ivec3(srcPos, pushConstants.srcImageLayer)).rg;\n"
-        "        imageStore(outputImageCbCr, ivec3(pos, pushConstants.dstImageLayer), vec4(CbCr, 0, 1));\n"
-        "    }\n"
-        "}\n";
+        "    // Handle proper subsampling based on format (" <<
+        (chromaHorzRatio == 2 ? (chromaVertRatio == 2 ? "4:2:0" : "4:2:2") : "4:4:4") << ")\n";
+
+    // Generate the chroma position condition with a boolean variable
+    GenHandleChromaPosition(shaderStr, chromaHorzRatio, chromaVertRatio, true, "pos", "processChroma");
+
+    // Initialize chroma positions with default values
+    shaderStr << "    // Initialize chroma positions\n"
+              << "    ivec2 chromaSrcPos = srcPos;\n"
+              << "    ivec2 chromaPos = pos;\n\n"
+              << "    // Check if we need to process chroma\n"
+              << "    if (processChroma) {\n";
+
+    // Generate chroma position calculations for source position
+    GenCalculateChromaPosition(shaderStr, chromaHorzRatio, chromaVertRatio, "srcPos", "chromaSrcPos", 8);
+
+    // Generate chroma position calculations for destination position
+    GenCalculateChromaPosition(shaderStr, chromaHorzRatio, chromaVertRatio, "pos", "chromaPos", 8);
+
+    shaderStr << "    }\n";
+
+    // Read YCbCr data using the helper function
+    shaderStr << "\n"
+              << "    // Read YCbCr data from source\n"
+              << "    vec3 YCbCrRawIn = readYCbCrFromSource(srcPos, chromaSrcPos, pushConstants.srcLayer, processChroma);\n\n";
+
+    // Process the data based on whether we need conversion
+    if (needsBitDepthConversion || needsRangeConversion) {
+        shaderStr <<
+                 "    // Need format conversion - normalize and denormalize\n"
+                 "    vec3 YCbCrRawOut = convertYCbCrFormat(YCbCrRawIn);\n\n";
+    } else {
+        shaderStr <<
+                 "    // No format conversion needed - direct copy\n"
+                 "    vec3 YCbCrRawOut = YCbCrRawIn;\n\n";
+    }
+
+    // Write the processed data using the helper function
+    shaderStr <<
+                 "    // Write processed data to destination\n"
+                 "    writeYCbCrToDestination(YCbCrRawOut, pos, chromaPos, pushConstants.dstLayer, processChroma);\n"
+                 "\n\n";
+
+    // Close the main function
+    shaderStr << "}\n";
 
     computeShader = shaderStr.str();
     if (dumpShaders)
@@ -495,37 +2171,849 @@ size_t VulkanFilterYuvCompute::InitYCBCRCLEAR(std::string& computeShader)
 
     // Create compute pipeline
     std::stringstream shaderStr;
+
+    // 1. Generate header and push constants
     GenHeaderAndPushConst(shaderStr);
 
-    // Output image
-    shaderStr << " // The output image binding\n";
+    // 2. Generate output image bindings
+    shaderStr << " // The output descriptors binding\n";
+    // Output Descriptors
     ShaderGeneratePlaneDescriptors(shaderStr,
-                                   m_outputImageAspects,
-                                   "outputImage",
-                                   m_outputFormat,
                                    false, // isInput
                                    4,     // startBinding
                                    0,     // set
-                                   true   // imageArray
-                                   );
+                                   true,  // imageArray
+                                   VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
     shaderStr << "\n\n";
 
+    // Get format information to determine subsampling ratios
+    const VkMpFormatInfo* outputMpInfo = YcbcrVkFormatInfo(m_outputFormat);
+    // Get subsampling ratios for output format
+    const uint32_t chromaHorzRatio = (outputMpInfo != nullptr) ? (1 << outputMpInfo->planesLayout.secondaryPlaneSubsampledX) : 1;
+    const uint32_t chromaVertRatio = (outputMpInfo != nullptr) ? (1 << outputMpInfo->planesLayout.secondaryPlaneSubsampledY) : 1;
+
+
+    // 3. Main function
     shaderStr <<
         "void main()\n"
         "{\n";
+
+    // 4. Handle position calculation
     GenHandleImagePosition(shaderStr);
+
+    // 5. Clear operations for Y plane
     shaderStr <<
-        "    imageStore(outputImageY, ivec3(pos, pushConstants.dstImageLayer), vec4(0.5, 0, 0, 1));\n"
-        "\n"
-        "    // Do the same for the CbCr plane, but remember about the 4:2:0 subsampling\n"
-        "    if (pos % 2 == ivec2(0, 0)) {\n"
-        "        pos /= 2;\n"
-        "        imageStore(outputImageCbCr, ivec3(pos, pushConstants.dstImageLayer), vec4(0.5, 0.5, 0.0, 1.0));\n"
-        "    }\n"
-        "}\n";
+        "    // Clear Y plane with 50% intensity\n"
+        "    imageStore(outputImageY, ivec3(pos, pushConstants.dstLayer), vec4(0.5, 0, 0, 1));\n"
+        "\n";
+
+    // Handle CbCr plane clearing based on format's subsampling
+    shaderStr <<
+        "    // Clear CbCr plane with " <<
+        (chromaHorzRatio == 2 ? (chromaVertRatio == 2 ? "4:2:0" : "4:2:2") : "4:4:4") <<
+        " subsampling\n";
+
+    // Generate a boolean to track whether this position needs chroma clearing
+    GenHandleChromaPosition(shaderStr, chromaHorzRatio, chromaVertRatio, true, "pos", "shouldClearChroma");
+
+    // Handle position for chroma planes
+    shaderStr << "    ivec2 chromaPos = pos;\n";
+    shaderStr << "    if (shouldClearChroma) {\n";
+
+    // Calculate chroma position if necessary
+    GenCalculateChromaPosition(shaderStr, chromaHorzRatio, chromaVertRatio, "pos", "chromaPos", 8);
+
+    // For 2-plane format, output CbCr together
+    if (m_outputImageAspects & VK_IMAGE_ASPECT_PLANE_1_BIT) {
+        shaderStr << "        // Clear CbCr plane with 50% intensity (middle range)\n"
+                  << "        imageStore(outputImageCbCr, ivec3(chromaPos, pushConstants.dstLayer), vec4(0.5, 0.5, 0.0, 1.0));\n";
+    }
+
+    // For 3-plane format, handle Cb and Cr separately
+    if (m_outputImageAspects & VK_IMAGE_ASPECT_PLANE_2_BIT) {
+        shaderStr << "        // Clear separate Cb and Cr planes with 50% intensity (middle range)\n"
+                  << "        imageStore(outputImageCb, ivec3(chromaPos, pushConstants.dstLayer), vec4(0.5, 0.0, 0.0, 1.0));\n"
+                  << "        imageStore(outputImageCr, ivec3(chromaPos, pushConstants.dstLayer), vec4(0.5, 0.0, 0.0, 1.0));\n";
+    }
+
+    shaderStr << "    }\n"
+              << "}\n";
 
     computeShader = shaderStr.str();
     if (dumpShaders)
         std::cout << "\nCompute Shader:\n" << computeShader;
     return computeShader.size();
 }
+
+uint32_t VulkanFilterYuvCompute::GetPlaneIndex(VkImageAspectFlagBits planeAspect) {
+
+    // Returns index 0 for VK_IMAGE_ASPECT_COLOR_BIT and VK_IMAGE_ASPECT_PLANE_0_BIT
+    // Returns index 1 for VK_IMAGE_ASPECT_PLANE_1_BIT
+    // Returns index 2 for VK_IMAGE_ASPECT_PLANE_2_BIT
+
+    // First, verify it's a plane aspect bit
+    assert(planeAspect & validAspects);
+
+    if (planeAspect & VK_IMAGE_ASPECT_COLOR_BIT) {
+        return 0;
+    }
+
+    // Alternatively, without intrinsics:
+    return (planeAspect & VK_IMAGE_ASPECT_PLANE_0_BIT) ? 0 :
+           (planeAspect & VK_IMAGE_ASPECT_PLANE_1_BIT) ? 1 : 2;
+}
+
+uint32_t VulkanFilterYuvCompute::UpdateBufferDescriptorSets(
+                                    const VkBuffer*            vkBuffers,
+                                    uint32_t                   numVkBuffers,
+                                    const VkSubresourceLayout* vkBufferSubresourceLayout,
+                                    uint32_t                   numPlanes,
+                                    VkImageAspectFlags         validImageAspects,
+                                    uint32_t&                  descrIndex,
+                                    uint32_t&                  baseBinding,
+                                    VkDescriptorType           descriptorType, // Ex: VK_DESCRIPTOR_TYPE_STORAGE_BUFFER
+                                    VkDescriptorBufferInfo     bufferDescriptors[maxNumComputeDescr],
+                                    std::array<VkWriteDescriptorSet, maxNumComputeDescr>& writeDescriptorSets,
+                                    const uint32_t maxDescriptors)
+{
+
+    validImageAspects &= validAspects;
+    uint32_t curImageAspect = 0;
+    uint32_t bufferIndex = 0;
+    while(validImageAspects) {
+
+        if (validImageAspects & (VK_IMAGE_ASPECT_COLOR_BIT << curImageAspect) ) {
+
+            uint32_t planeNum = GetPlaneIndex((VkImageAspectFlagBits)(VK_IMAGE_ASPECT_COLOR_BIT << curImageAspect));
+            uint32_t dstBinding = baseBinding;
+            if (curImageAspect > 0) {
+                // the first plane is 1, second plane is 2, the 3rd is 3
+                dstBinding += (1 + planeNum);
+            }
+
+            writeDescriptorSets[descrIndex].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
+            writeDescriptorSets[descrIndex].dstSet = VK_NULL_HANDLE;
+            writeDescriptorSets[descrIndex].dstBinding = dstBinding;
+            writeDescriptorSets[descrIndex].descriptorCount = 1;
+            writeDescriptorSets[descrIndex].descriptorType = descriptorType;
+
+            bufferDescriptors[descrIndex].buffer = vkBuffers[bufferIndex];
+            bufferDescriptors[descrIndex].offset = vkBufferSubresourceLayout[planeNum].offset;
+            bufferDescriptors[descrIndex].range =  vkBufferSubresourceLayout[planeNum].arrayPitch;
+            writeDescriptorSets[descrIndex].pBufferInfo = &bufferDescriptors[descrIndex];
+            descrIndex++;
+            validImageAspects &= ~(VK_IMAGE_ASPECT_COLOR_BIT << curImageAspect);
+            bufferIndex = std::min(numVkBuffers - 1, bufferIndex + 1);
+        }
+
+        curImageAspect++;
+    }
+    assert(descrIndex <= maxDescriptors);
+    return descrIndex;
+}
+
+uint32_t VulkanFilterYuvCompute::UpdateImageDescriptorSets(
+                                    const VkImageResourceView* imageView,
+                                    VkImageAspectFlags         validImageAspects,
+                                    VkSampler                  convSampler,
+                                    VkImageLayout              imageLayout,
+                                    uint32_t&                  descrIndex,
+                                    uint32_t&                  baseBinding,
+                                    VkDescriptorType           descriptorType, // Ex: VK_DESCRIPTOR_TYPE_STORAGE_IMAGE
+                                    VkDescriptorImageInfo      imageDescriptors[maxNumComputeDescr],
+                                    std::array<VkWriteDescriptorSet, maxNumComputeDescr>& writeDescriptorSets,
+                                    const uint32_t maxDescriptors)
+{
+
+    validImageAspects &= validAspects;
+    uint32_t curImageAspect = 0;
+    [[maybe_unused]] const uint32_t numPlanes = imageView->GetNumberOfPlanes();
+    while(validImageAspects) {
+
+        if (validImageAspects & (VK_IMAGE_ASPECT_COLOR_BIT << curImageAspect) ) {
+
+            VkSampler ccSampler = (curImageAspect == 0) ? convSampler : VK_NULL_HANDLE;
+            uint32_t planeNum = GetPlaneIndex((VkImageAspectFlagBits)(VK_IMAGE_ASPECT_COLOR_BIT << curImageAspect));
+            assert(planeNum < numPlanes);
+            uint32_t dstBinding = baseBinding;
+            if (curImageAspect > 0) {
+                // the first plane is 1, second plane is 2, the 3rd is 3
+                dstBinding += (1 + planeNum);
+            }
+
+            writeDescriptorSets[descrIndex].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
+            writeDescriptorSets[descrIndex].dstSet = VK_NULL_HANDLE;
+            writeDescriptorSets[descrIndex].dstBinding = dstBinding;
+            writeDescriptorSets[descrIndex].descriptorCount = 1;
+            writeDescriptorSets[descrIndex].descriptorType = (ccSampler != VK_NULL_HANDLE) ?
+                                                              VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER :
+                                                              descriptorType;
+            imageDescriptors[descrIndex].sampler = ccSampler;
+            imageDescriptors[descrIndex].imageView = (curImageAspect == 0) ?
+                                                      imageView->GetImageView() :
+                                                      imageView->GetPlaneImageView(planeNum);
+            assert(imageDescriptors[descrIndex].imageView);
+            imageDescriptors[descrIndex].imageLayout = imageLayout;
+            writeDescriptorSets[descrIndex].pImageInfo = &imageDescriptors[descrIndex]; // Y (0) plane
+            descrIndex++;
+            validImageAspects &= ~(VK_IMAGE_ASPECT_COLOR_BIT << curImageAspect);
+        }
+
+        curImageAspect++;
+    }
+    assert(descrIndex <= maxDescriptors);
+    return descrIndex;
+}
+
+// Image input -> Image output
+VkResult VulkanFilterYuvCompute::RecordCommandBuffer(VkCommandBuffer cmdBuf,
+                                                     const VkImageResourceView* inImageView,
+                                                     const VkVideoPictureResourceInfoKHR * inImageResourceInfo,
+                                                     const VkImageResourceView* outImageView,
+                                                     const VkVideoPictureResourceInfoKHR * outImageResourceInfo,
+                                                     uint32_t bufferIdx)
+{
+
+    assert(cmdBuf != VK_NULL_HANDLE);
+
+    m_vkDevCtx->CmdBindPipeline(cmdBuf, VK_PIPELINE_BIND_POINT_COMPUTE, m_computePipeline.getPipeline());
+
+    VkDescriptorSetLayoutCreateFlags layoutMode = m_descriptorSetLayout.GetDescriptorSetLayoutInfo().GetDescriptorLayoutMode();
+
+    switch (layoutMode) {
+        case VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR:
+        case VK_DESCRIPTOR_SET_LAYOUT_CREATE_DESCRIPTOR_BUFFER_BIT_EXT:
+        {
+
+            VkDescriptorImageInfo imageDescriptors[maxNumComputeDescr]{};
+            std::array<VkWriteDescriptorSet, maxNumComputeDescr> writeDescriptorSets{};
+
+            // Images
+            uint32_t set = 0;
+            uint32_t descrIndex = 0;
+            uint32_t dstBinding = 0;
+
+            // IN 0: RGBA color converted by an YCbCr sample
+            // IN 1: y plane - G -> R8
+            // IN 2: Cb or Cr or CbCr plane - BR -> R8B8
+            // IN 3: Cr or Cb plane - R -> R8
+            UpdateImageDescriptorSets(inImageView,
+                                      m_inputImageAspects,
+                                      m_samplerYcbcrConversion.GetSampler(),
+                                      VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
+                                      descrIndex,
+                                      dstBinding,
+                                      VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+                                      imageDescriptors,
+                                      writeDescriptorSets,
+                                      maxNumComputeDescr / 2 /* max descriptors */);
+
+            dstBinding = 4;
+            // OUT 4: Out RGBA or single planar YCbCr image
+            // OUT 5: y plane - G -> R8
+            // OUT 6: Cb or Cr or CbCr plane - BR -> R8B8
+            // OUT 7: Cr or Cb plane - R -> R8
+            UpdateImageDescriptorSets(outImageView,
+                                      m_outputImageAspects,
+                                      VK_NULL_HANDLE,
+                                      VK_IMAGE_LAYOUT_GENERAL,
+                                      descrIndex,
+                                      dstBinding,
+                                      VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+                                      imageDescriptors,
+                                      writeDescriptorSets,
+                                      maxNumComputeDescr /* max descriptors */);
+
+            assert(descrIndex <= maxNumComputeDescr);
+            assert(descrIndex >= 2);
+
+            if (layoutMode ==  VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR) {
+                m_vkDevCtx->CmdPushDescriptorSetKHR(cmdBuf, VK_PIPELINE_BIND_POINT_COMPUTE,
+                                                    m_descriptorSetLayout.GetPipelineLayout(),
+                                                    set, descrIndex, writeDescriptorSets.data());
+            } else {
+
+                VkDeviceOrHostAddressConstKHR imageDescriptorBufferDeviceAddress =
+                      m_descriptorSetLayout.UpdateDescriptorBuffer(bufferIdx,
+                                                                   set,
+                                                                   descrIndex,
+                                                                   writeDescriptorSets.data());
+
+
+                // Descriptor buffer bindings
+                // Set 0 = Image
+                VkDescriptorBufferBindingInfoEXT bindingInfo{};
+                bindingInfo.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_BUFFER_BINDING_INFO_EXT;
+                bindingInfo.pNext = nullptr;
+                bindingInfo.address = imageDescriptorBufferDeviceAddress.deviceAddress;
+                bindingInfo.usage = VK_BUFFER_USAGE_SAMPLER_DESCRIPTOR_BUFFER_BIT_EXT |
+                                    VK_BUFFER_USAGE_RESOURCE_DESCRIPTOR_BUFFER_BIT_EXT;
+                m_vkDevCtx->CmdBindDescriptorBuffersEXT(cmdBuf, 1, &bindingInfo);
+
+                // Image (set 0)
+                uint32_t bufferIndexImage = 0;
+                VkDeviceSize bufferOffset = 0;
+                m_vkDevCtx->CmdSetDescriptorBufferOffsetsEXT(cmdBuf, VK_PIPELINE_BIND_POINT_COMPUTE,
+                                                           m_descriptorSetLayout.GetPipelineLayout(),
+                                                           set, 1, &bufferIndexImage, &bufferOffset);
+            }
+        }
+        break;
+
+        default:
+        m_vkDevCtx->CmdBindDescriptorSets(cmdBuf, VK_PIPELINE_BIND_POINT_COMPUTE,
+                                          m_descriptorSetLayout.GetPipelineLayout(),
+                                          0, 1, m_descriptorSetLayout.GetDescriptorSet(), 0, 0);
+    }
+
+    struct ivec2 {
+        uint32_t width;
+        uint32_t height;
+
+        ivec2() : width(0), height(0) {}
+        ivec2(int32_t width_, int32_t height_) : width(width_), height(height_) {}
+    };
+
+    struct ImagePushConstants {
+        uint32_t srcLayer;
+        uint32_t dstLayer;
+        ivec2    inputSize;
+        ivec2    outputSize;
+        uint32_t yOffset;   // Y plane offset
+        uint32_t cbOffset;  // Cb plane offset
+        uint32_t crOffset;  // Cr plane offset
+        uint32_t yPitch;    // Y plane pitch
+        uint32_t cbPitch;   // Cb plane pitch
+        uint32_t crPitch;   // Cr plane pitch
+    };
+
+    ImagePushConstants pushConstants = {
+            inImageResourceInfo->baseArrayLayer, // Set the source layer index
+            outImageResourceInfo->baseArrayLayer, // Set the destination layer index
+            ivec2(inImageResourceInfo->codedExtent.width, inImageResourceInfo->codedExtent.height),
+            ivec2(outImageResourceInfo->codedExtent.width, outImageResourceInfo->codedExtent.height),
+            0,  // yOffset - not used for image input
+            0,  // cbOffset - not used for image input
+            0,  // crOffset - not used for image input
+            0,  // yPitch - not used for image input
+            0,  // cbPitch - not used for image input
+            0   // crPitch - not used for image input
+    };
+
+    m_vkDevCtx->CmdPushConstants(cmdBuf,
+                                 m_descriptorSetLayout.GetPipelineLayout(),
+                                 VK_SHADER_STAGE_COMPUTE_BIT,
+                                 0,
+                                 sizeof(ImagePushConstants),
+                                 &pushConstants);
+
+    const uint32_t  workgroupWidth  = (pushConstants.outputSize.width  + (m_workgroupSizeX - 1)) / m_workgroupSizeX;
+    const uint32_t  workgroupHeight = (pushConstants.outputSize.height + (m_workgroupSizeY - 1)) / m_workgroupSizeY;
+    m_vkDevCtx->CmdDispatch(cmdBuf, workgroupWidth, workgroupHeight, 1);
+
+    return VK_SUCCESS;
+}
+
+// Buffer input -> Image output
+VkResult VulkanFilterYuvCompute::RecordCommandBuffer(VkCommandBuffer cmdBuf,
+                                                     const VkBuffer*            inBuffers,
+                                                     uint32_t                   numInBuffers,
+                                                     const VkFormat*            inBufferFormats,
+                                                     const VkSubresourceLayout* inBufferSubresourceLayouts,
+                                                     uint32_t                   inBufferNumPlanes,
+                                                     const VkImageResourceView* outImageView,
+                                                     const VkVideoPictureResourceInfoKHR* outImageResourceInfo,
+                                                     const VkBufferImageCopy* pBufferImageCopy,
+                                                     uint32_t bufferIdx)
+{
+    assert(cmdBuf != VK_NULL_HANDLE);
+    assert(m_inputIsBuffer  == true);
+    assert(m_outputIsBuffer == false);
+
+    m_vkDevCtx->CmdBindPipeline(cmdBuf, VK_PIPELINE_BIND_POINT_COMPUTE, m_computePipeline.getPipeline());
+
+    VkDescriptorSetLayoutCreateFlags layoutMode = m_descriptorSetLayout.GetDescriptorSetLayoutInfo().GetDescriptorLayoutMode();
+
+    switch (layoutMode) {
+        case VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR:
+        case VK_DESCRIPTOR_SET_LAYOUT_CREATE_DESCRIPTOR_BUFFER_BIT_EXT:
+        {
+            VkDescriptorImageInfo imageDescriptors[maxNumComputeDescr / 2]{};
+            VkDescriptorBufferInfo bufferDescriptors[maxNumComputeDescr / 2]{};
+            std::array<VkWriteDescriptorSet, maxNumComputeDescr> writeDescriptorSets{};
+
+            uint32_t set = 0;
+            uint32_t descrIndex = 0;
+            uint32_t dstBinding = 0;
+
+            // Buffer input handling
+            // IN 0: Single buffer YCbCr
+            // IN 1: Y plane buffer
+            // IN 2: Cb, Cr or CbCr plane buffer
+            // IN 3: Cr plane buffer
+            UpdateBufferDescriptorSets(inBuffers, numInBuffers,
+                                       inBufferSubresourceLayouts, inBufferNumPlanes,
+                                       m_inputImageAspects,
+                                       descrIndex, dstBinding,
+                                       VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+                                       bufferDescriptors,
+                                       writeDescriptorSets,
+                                       maxNumComputeDescr / 2);
+
+
+            // Image output
+            dstBinding = 4;
+            // OUT 4: Out RGBA or single planar YCbCr image
+            // OUT 5: y plane - G -> R8
+            // OUT 6: Cb or Cr or CbCr plane - BR -> R8B8
+            // OUT 7: Cr or Cb plane - R -> R8
+            UpdateImageDescriptorSets(outImageView,
+                                      m_outputImageAspects,
+                                      VK_NULL_HANDLE,
+                                      VK_IMAGE_LAYOUT_GENERAL,
+                                      descrIndex,
+                                      dstBinding,
+                                      VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+                                      imageDescriptors,
+                                      writeDescriptorSets,
+                                      maxNumComputeDescr /* max descriptors */);
+
+            assert(descrIndex <= maxNumComputeDescr);
+            assert(descrIndex >= 2);
+
+            if (layoutMode == VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR) {
+                m_vkDevCtx->CmdPushDescriptorSetKHR(cmdBuf, VK_PIPELINE_BIND_POINT_COMPUTE,
+                                                    m_descriptorSetLayout.GetPipelineLayout(),
+                                                    set, descrIndex, writeDescriptorSets.data());
+            } else {
+                VkDeviceOrHostAddressConstKHR descriptorBufferDeviceAddress =
+                      m_descriptorSetLayout.UpdateDescriptorBuffer(bufferIdx,
+                                                                   set,
+                                                                   descrIndex,
+                                                                   writeDescriptorSets.data());
+
+
+                // Descriptor buffer bindings
+                VkDescriptorBufferBindingInfoEXT bindingInfo{};
+                bindingInfo.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_BUFFER_BINDING_INFO_EXT;
+                bindingInfo.pNext = nullptr;
+                bindingInfo.address = descriptorBufferDeviceAddress.deviceAddress;
+                bindingInfo.usage = VK_BUFFER_USAGE_SAMPLER_DESCRIPTOR_BUFFER_BIT_EXT |
+                                    VK_BUFFER_USAGE_RESOURCE_DESCRIPTOR_BUFFER_BIT_EXT;
+                m_vkDevCtx->CmdBindDescriptorBuffersEXT(cmdBuf, 1, &bindingInfo);
+
+                uint32_t bufferIndexImage = 0;
+                VkDeviceSize bufferOffset = 0;
+                m_vkDevCtx->CmdSetDescriptorBufferOffsetsEXT(cmdBuf, VK_PIPELINE_BIND_POINT_COMPUTE,
+                                                         m_descriptorSetLayout.GetPipelineLayout(),
+                                                         set, 1, &bufferIndexImage, &bufferOffset);
+            }
+        }
+        break;
+
+        default:
+        m_vkDevCtx->CmdBindDescriptorSets(cmdBuf, VK_PIPELINE_BIND_POINT_COMPUTE,
+                                          m_descriptorSetLayout.GetPipelineLayout(),
+                                          0, 1, m_descriptorSetLayout.GetDescriptorSet(), 0, 0);
+    }
+
+    struct ivec2 {
+        uint32_t width;
+        uint32_t height;
+
+        ivec2() : width(0), height(0) {}
+        ivec2(int32_t width_, int32_t height_) : width(width_), height(height_) {}
+    };
+
+    struct BufferToImagePushConstants {
+        uint32_t srcLayer;
+        uint32_t dstLayer;
+        ivec2    inputSize;
+        ivec2    outputSize;
+        uint32_t yOffset;   // Y plane offset
+        uint32_t cbOffset;  // Cb plane offset
+        uint32_t crOffset;  // Cr plane offset
+        uint32_t yPitch;    // Y plane pitch
+        uint32_t cbPitch;   // Cb plane pitch
+        uint32_t crPitch;   // Cr plane pitch
+    };
+
+    uint32_t width, height;
+    uint32_t rowPitch;
+
+    assert(pBufferImageCopy);
+    width = pBufferImageCopy->bufferRowLength > 0 ?
+            pBufferImageCopy->bufferRowLength :
+            pBufferImageCopy->imageExtent.width;
+    height = pBufferImageCopy->bufferImageHeight > 0 ?
+             pBufferImageCopy->bufferImageHeight :
+             pBufferImageCopy->imageExtent.height;
+    rowPitch = width;
+
+    VkExtent3D outputExtent = outImageView->GetImageResource()->GetImageCreateInfo().extent;
+
+    VkDeviceSize planeSize = width * height;
+    VkDeviceSize yOffset = pBufferImageCopy ? pBufferImageCopy->bufferOffset : 0;
+    VkDeviceSize cbOffset = yOffset + planeSize;
+    VkDeviceSize crOffset = cbOffset + (planeSize / 4);
+
+    BufferToImagePushConstants pushConstants = {
+            pBufferImageCopy->imageSubresource.baseArrayLayer,
+            outImageResourceInfo->baseArrayLayer,
+            ivec2(width, height),
+            ivec2(outputExtent.width, outputExtent.height),
+            static_cast<uint32_t>(yOffset),
+            static_cast<uint32_t>(cbOffset),
+            static_cast<uint32_t>(crOffset),
+            rowPitch,
+            rowPitch / 2,  // For 4:2:0 format
+            rowPitch / 2   // For 4:2:0 format
+    };
+
+    m_vkDevCtx->CmdPushConstants(cmdBuf,
+                                 m_descriptorSetLayout.GetPipelineLayout(),
+                                 VK_SHADER_STAGE_COMPUTE_BIT,
+                                 0,
+                                 sizeof(BufferToImagePushConstants),
+                                 &pushConstants);
+
+    const uint32_t workgroupWidth = (pushConstants.outputSize.width + (m_workgroupSizeX - 1)) / m_workgroupSizeX;
+    const uint32_t workgroupHeight = (pushConstants.outputSize.height + (m_workgroupSizeY - 1)) / m_workgroupSizeY;
+    m_vkDevCtx->CmdDispatch(cmdBuf, workgroupWidth, workgroupHeight, 1);
+
+    return VK_SUCCESS;
+}
+
+// Image input -> Buffer output
+VkResult VulkanFilterYuvCompute::RecordCommandBuffer(VkCommandBuffer cmdBuf,
+                                                     const VkImageResourceView* inImageView,
+                                                     const VkVideoPictureResourceInfoKHR* inImageResourceInfo,
+                                                     const VkBuffer*            outBuffers,        // with size numOutBuffers
+                                                     uint32_t                   numOutBuffers,
+                                                     const VkFormat*            outBufferFormats,   // with size outBufferNumPlanes
+                                                     const VkSubresourceLayout* outBufferSubresourceLayouts, // with size outBufferNumPlanes
+                                                     uint32_t                   outBufferNumPlanes,
+                                                     const VkBufferImageCopy*   pBufferImageCopy,
+                                                     uint32_t bufferIdx)
+{
+    assert(cmdBuf != VK_NULL_HANDLE);
+    assert(m_inputIsBuffer  == false);
+    assert(m_outputIsBuffer == true);
+
+    m_vkDevCtx->CmdBindPipeline(cmdBuf, VK_PIPELINE_BIND_POINT_COMPUTE, m_computePipeline.getPipeline());
+
+    VkDescriptorSetLayoutCreateFlags layoutMode = m_descriptorSetLayout.GetDescriptorSetLayoutInfo().GetDescriptorLayoutMode();
+
+    switch (layoutMode) {
+        case VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR:
+        case VK_DESCRIPTOR_SET_LAYOUT_CREATE_DESCRIPTOR_BUFFER_BIT_EXT:
+        {
+            VkDescriptorImageInfo imageDescriptors[maxNumComputeDescr / 2]{};
+            VkDescriptorBufferInfo bufferDescriptors[maxNumComputeDescr / 2]{};
+            std::array<VkWriteDescriptorSet, maxNumComputeDescr> writeDescriptorSets{};
+
+            uint32_t set = 0;
+            uint32_t descrIndex = 0;
+            uint32_t dstBinding = 0;
+
+            // IN 0: RGBA color converted by an YCbCr sample
+            // IN 1: y plane - G -> R8
+            // IN 2: Cb or Cr or CbCr plane - BR -> R8B8
+            // IN 3: Cr or Cb plane - R -> R8
+            UpdateImageDescriptorSets(inImageView,
+                                      m_inputImageAspects,
+                                      m_samplerYcbcrConversion.GetSampler(),
+                                      VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
+                                      descrIndex,
+                                      dstBinding,
+                                      VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+                                      imageDescriptors,
+                                      writeDescriptorSets,
+                                      maxNumComputeDescr / 2 /* max descriptors */);
+
+            // Output buffer handling
+            dstBinding = 4;
+            // OUT 0: Single buffer YCbCr
+            // OUT 1: Y plane buffer
+            // OUT 2: Cb, Cr or CbCr plane buffer
+            // OUT 3: Cr or Cb plane buffer
+            UpdateBufferDescriptorSets(outBuffers, numOutBuffers,
+                                       outBufferSubresourceLayouts, outBufferNumPlanes,
+                                       m_inputImageAspects,
+                                       descrIndex, dstBinding,
+                                       VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+                                       bufferDescriptors,
+                                       writeDescriptorSets,
+                                       maxNumComputeDescr);
+
+            assert(descrIndex <= maxNumComputeDescr);
+            assert(descrIndex >= 2);
+
+            if (layoutMode == VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR) {
+                m_vkDevCtx->CmdPushDescriptorSetKHR(cmdBuf, VK_PIPELINE_BIND_POINT_COMPUTE,
+                                                   m_descriptorSetLayout.GetPipelineLayout(),
+                                                   set, descrIndex, writeDescriptorSets.data());
+            } else {
+                VkDeviceOrHostAddressConstKHR descriptorBufferDeviceAddress =
+                     m_descriptorSetLayout.UpdateDescriptorBuffer(bufferIdx,
+                                                                 set,
+                                                                 descrIndex,
+                                                                 writeDescriptorSets.data());
+
+                // Descriptor buffer bindings
+                VkDescriptorBufferBindingInfoEXT bindingInfo{};
+                bindingInfo.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_BUFFER_BINDING_INFO_EXT;
+                bindingInfo.pNext = nullptr;
+                bindingInfo.address = descriptorBufferDeviceAddress.deviceAddress;
+                bindingInfo.usage = VK_BUFFER_USAGE_SAMPLER_DESCRIPTOR_BUFFER_BIT_EXT |
+                                   VK_BUFFER_USAGE_RESOURCE_DESCRIPTOR_BUFFER_BIT_EXT;
+                m_vkDevCtx->CmdBindDescriptorBuffersEXT(cmdBuf, 1, &bindingInfo);
+
+                uint32_t bufferIndexImage = 0;
+                VkDeviceSize bufferOffset = 0;
+                m_vkDevCtx->CmdSetDescriptorBufferOffsetsEXT(cmdBuf, VK_PIPELINE_BIND_POINT_COMPUTE,
+                                                             m_descriptorSetLayout.GetPipelineLayout(),
+                                                             set, 1, &bufferIndexImage, &bufferOffset);
+            }
+        }
+        break;
+
+        default:
+        m_vkDevCtx->CmdBindDescriptorSets(cmdBuf, VK_PIPELINE_BIND_POINT_COMPUTE,
+                                          m_descriptorSetLayout.GetPipelineLayout(),
+                                          0, 1, m_descriptorSetLayout.GetDescriptorSet(), 0, 0);
+    }
+
+    struct ivec2 {
+        uint32_t width;
+        uint32_t height;
+
+        ivec2() : width(0), height(0) {}
+        ivec2(int32_t width_, int32_t height_) : width(width_), height(height_) {}
+    };
+
+    struct ImageToBufferPushConstants {
+        uint32_t srcLayer;
+        uint32_t dstLayer;
+        ivec2    inputSize;
+        ivec2    outputSize;
+        uint32_t yOffset;   // Y plane offset
+        uint32_t cbOffset;  // Cb plane offset
+        uint32_t crOffset;  // Cr plane offset
+        uint32_t yPitch;    // Y plane pitch
+        uint32_t cbPitch;   // Cb plane pitch
+        uint32_t crPitch;   // Cr plane pitch
+    };
+
+    uint32_t width, height;
+    uint32_t rowPitch;
+    VkExtent3D inputExtent = inImageView->GetImageResource()->GetImageCreateInfo().extent;
+
+    if (pBufferImageCopy) {
+        width = pBufferImageCopy->bufferRowLength > 0 ?
+                pBufferImageCopy->bufferRowLength :
+                pBufferImageCopy->imageExtent.width;
+        height = pBufferImageCopy->bufferImageHeight > 0 ?
+                pBufferImageCopy->bufferImageHeight :
+                pBufferImageCopy->imageExtent.height;
+        rowPitch = width;
+    } else {
+        width = inputExtent.width;
+        height = inputExtent.height;
+        rowPitch = width;
+    }
+
+    VkDeviceSize planeSize = width * height;
+    VkDeviceSize yOffset = pBufferImageCopy ? pBufferImageCopy->bufferOffset : 0;
+    VkDeviceSize cbOffset = yOffset + planeSize;
+    VkDeviceSize crOffset = cbOffset + (planeSize / 4);
+
+    ImageToBufferPushConstants pushConstants = {
+            inImageResourceInfo->baseArrayLayer,
+            0, // Destination layer (buffer has no layers)
+            ivec2(inputExtent.width, inputExtent.height),
+            ivec2(width, height),
+            static_cast<uint32_t>(yOffset),
+            static_cast<uint32_t>(cbOffset),
+            static_cast<uint32_t>(crOffset),
+            rowPitch,
+            rowPitch / 2,  // For 4:2:0 format
+            rowPitch / 2   // For 4:2:0 format
+    };
+
+    m_vkDevCtx->CmdPushConstants(cmdBuf,
+                               m_descriptorSetLayout.GetPipelineLayout(),
+                               VK_SHADER_STAGE_COMPUTE_BIT,
+                               0,
+                               sizeof(ImageToBufferPushConstants),
+                               &pushConstants);
+
+    const uint32_t workgroupWidth = (pushConstants.outputSize.width + (m_workgroupSizeX - 1)) / m_workgroupSizeX;
+    const uint32_t workgroupHeight = (pushConstants.outputSize.height + (m_workgroupSizeY - 1)) / m_workgroupSizeY;
+    m_vkDevCtx->CmdDispatch(cmdBuf, workgroupWidth, workgroupHeight, 1);
+
+    return VK_SUCCESS;
+}
+
+// Buffer input -> Buffer output (all buffer case)
+VkResult VulkanFilterYuvCompute::RecordCommandBuffer(VkCommandBuffer cmdBuf,
+                                                     const VkBuffer*            inBuffers,
+                                                     uint32_t                   numInBuffers,
+                                                     const VkFormat*            inBufferFormats, // with size inBufferNumPlanes
+                                                     const VkSubresourceLayout* inBufferSubresourceLayouts,
+                                                     uint32_t                   numInPlanes,
+                                                     const VkExtent3D&          inBufferExtent,
+                                                     const VkBuffer*            outBuffers,
+                                                     uint32_t                   numOutBuffers,
+                                                     const VkFormat*            outBufferFormats,
+                                                     const VkSubresourceLayout* outBufferSubresourceLayouts,
+                                                     uint32_t                   numOutPlanes,
+                                                     const VkExtent3D&          outBufferExtent,
+                                                     uint32_t bufferIdx)
+{
+    assert(cmdBuf != VK_NULL_HANDLE);
+    assert(m_inputIsBuffer  == true);
+    assert(m_outputIsBuffer == true);
+
+    m_vkDevCtx->CmdBindPipeline(cmdBuf, VK_PIPELINE_BIND_POINT_COMPUTE, m_computePipeline.getPipeline());
+
+    VkDescriptorSetLayoutCreateFlags layoutMode = m_descriptorSetLayout.GetDescriptorSetLayoutInfo().GetDescriptorLayoutMode();
+
+    switch (layoutMode) {
+        case VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR:
+        case VK_DESCRIPTOR_SET_LAYOUT_CREATE_DESCRIPTOR_BUFFER_BIT_EXT:
+        {
+            VkDescriptorBufferInfo bufferDescriptors[maxNumComputeDescr]{};
+            std::array<VkWriteDescriptorSet, maxNumComputeDescr> writeDescriptorSets{};
+
+            uint32_t set = 0;
+            uint32_t descrIndex = 0;
+            uint32_t dstBinding = 0;
+
+            // Input buffer handling
+            // IN 0: Single buffer YCbCr
+            // IN 1: Y plane buffer
+            // IN 2: Cb, Cr or CbCr plane buffer
+            // IN 3: Cr plane buffer
+            UpdateBufferDescriptorSets(inBuffers, numInBuffers,
+                                       inBufferSubresourceLayouts, numInPlanes,
+                                       m_inputImageAspects,
+                                       descrIndex, dstBinding,
+                                       VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+                                       bufferDescriptors,
+                                       writeDescriptorSets,
+                                       maxNumComputeDescr / 2);
+
+            // Output buffer handling
+            dstBinding = 4;
+            // OUT 0: Single buffer YCbCr
+            // OUT 1: Y plane buffer
+            // OUT 2: Cb, Cr or CbCr plane buffer
+            // OUT 3: Cr or Cb plane buffer
+            UpdateBufferDescriptorSets(outBuffers, numOutBuffers,
+                                       outBufferSubresourceLayouts, numOutPlanes,
+                                       m_inputImageAspects,
+                                       descrIndex, dstBinding,
+                                       VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+                                       bufferDescriptors,
+                                       writeDescriptorSets,
+                                       maxNumComputeDescr);
+
+            assert(descrIndex <= maxNumComputeDescr);
+            assert(descrIndex >= 2);
+
+            if (layoutMode == VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR) {
+                m_vkDevCtx->CmdPushDescriptorSetKHR(cmdBuf, VK_PIPELINE_BIND_POINT_COMPUTE,
+                                                  m_descriptorSetLayout.GetPipelineLayout(),
+                                                  set, descrIndex, writeDescriptorSets.data());
+            } else {
+                VkDeviceOrHostAddressConstKHR descriptorBufferDeviceAddress =
+                      m_descriptorSetLayout.UpdateDescriptorBuffer(bufferIdx,
+                                                                 set,
+                                                                 descrIndex,
+                                                                 writeDescriptorSets.data());
+
+                // Descriptor buffer bindings
+                VkDescriptorBufferBindingInfoEXT bindingInfo{};
+                bindingInfo.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_BUFFER_BINDING_INFO_EXT;
+                bindingInfo.pNext = nullptr;
+                bindingInfo.address = descriptorBufferDeviceAddress.deviceAddress;
+                bindingInfo.usage = VK_BUFFER_USAGE_SAMPLER_DESCRIPTOR_BUFFER_BIT_EXT |
+                                    VK_BUFFER_USAGE_RESOURCE_DESCRIPTOR_BUFFER_BIT_EXT;
+                m_vkDevCtx->CmdBindDescriptorBuffersEXT(cmdBuf, 1, &bindingInfo);
+
+                uint32_t bufferIndexImage = 0;
+                VkDeviceSize bufferOffset = 0;
+                m_vkDevCtx->CmdSetDescriptorBufferOffsetsEXT(cmdBuf, VK_PIPELINE_BIND_POINT_COMPUTE,
+                                                           m_descriptorSetLayout.GetPipelineLayout(),
+                                                           set, 1, &bufferIndexImage, &bufferOffset);
+            }
+        }
+        break;
+
+        default:
+        m_vkDevCtx->CmdBindDescriptorSets(cmdBuf, VK_PIPELINE_BIND_POINT_COMPUTE,
+                                        m_descriptorSetLayout.GetPipelineLayout(),
+                                        0, 1, m_descriptorSetLayout.GetDescriptorSet(), 0, 0);
+    }
+
+    struct ivec2 {
+        uint32_t width;
+        uint32_t height;
+
+        ivec2() : width(0), height(0) {}
+        ivec2(int32_t width_, int32_t height_) : width(width_), height(height_) {}
+    };
+
+    struct BufferToBufferPushConstants {
+        uint32_t srcLayer;    // src image layer to use
+        uint32_t dstLayer;    // dst image layer to use
+        ivec2    inputSize;   // input image or buffer extent
+        ivec2    outputSize;  // output image or buffer extent
+        uint32_t inYOffset;   // input  buffer Y plane offset
+        uint32_t inCbOffset;  // input  buffer Cb plane offset
+        uint32_t inCrOffset;  // input  buffer Cr plane offset
+        uint32_t inYPitch;    // input  buffer Y plane pitch
+        uint32_t inCbPitch;   // input  buffer Cb plane pitch
+        uint32_t inCrPitch;   // input  buffer Cr plane pitch
+        uint32_t outYOffset;   // output buffer Y plane offset
+        uint32_t outCbOffset;  // output buffer Cb plane offset
+        uint32_t outCrOffset;  // output buffer Cr plane offset
+        uint32_t outYPitch;    // output buffer Y plane pitch
+        uint32_t outCbPitch;   // output buffer Cb plane pitch
+        uint32_t outCrPitch;   // output buffer Cr plane pitch
+    };
+
+    // Calculate buffer parameters
+    uint32_t rowPitch = inBufferExtent.width;
+    VkDeviceSize planeSize = inBufferExtent.width * inBufferExtent.height;
+    VkDeviceSize yOffset = 0;
+    VkDeviceSize cbOffset = planeSize;
+    VkDeviceSize crOffset = cbOffset + (planeSize / 4);
+
+    BufferToBufferPushConstants pushConstants = {
+            0, // Source layer (buffer has no layers)
+            0, // Destination layer (buffer has no layers)
+            ivec2(inBufferExtent.width, inBufferExtent.height),
+            ivec2(outBufferExtent.width, outBufferExtent.height),
+            static_cast<uint32_t>(yOffset),
+            static_cast<uint32_t>(cbOffset),
+            static_cast<uint32_t>(crOffset),
+            rowPitch,
+            rowPitch / 2,  // For 4:2:0 format
+            rowPitch / 2   // For 4:2:0 format
+    };
+
+    m_vkDevCtx->CmdPushConstants(cmdBuf,
+                               m_descriptorSetLayout.GetPipelineLayout(),
+                               VK_SHADER_STAGE_COMPUTE_BIT,
+                               0,
+                               sizeof(BufferToBufferPushConstants),
+                               &pushConstants);
+
+    const uint32_t workgroupWidth = (pushConstants.outputSize.width + (m_workgroupSizeX - 1)) / m_workgroupSizeX;
+    const uint32_t workgroupHeight = (pushConstants.outputSize.height + (m_workgroupSizeY - 1)) / m_workgroupSizeY;
+    m_vkDevCtx->CmdDispatch(cmdBuf, workgroupWidth, workgroupHeight, 1);
+
+    return VK_SUCCESS;
+}
diff --git a/common/libs/VkCodecUtils/VulkanFilterYuvCompute.h b/common/libs/VkCodecUtils/VulkanFilterYuvCompute.h
index ef8db51a..ab9a8845 100644
--- a/common/libs/VkCodecUtils/VulkanFilterYuvCompute.h
+++ b/common/libs/VkCodecUtils/VulkanFilterYuvCompute.h
@@ -32,6 +32,15 @@ class VulkanFilterYuvCompute : public VulkanFilter
 public:
 
     enum FilterType { YCBCRCOPY, YCBCRCLEAR, YCBCR2RGBA, RGBA2YCBCR };
+    static constexpr uint32_t maxNumComputeDescr = 8;
+
+    static constexpr VkImageAspectFlags validPlaneAspects = VK_IMAGE_ASPECT_PLANE_0_BIT |
+                                                            VK_IMAGE_ASPECT_PLANE_1_BIT |
+                                                            VK_IMAGE_ASPECT_PLANE_2_BIT;
+
+    static constexpr VkImageAspectFlags validAspects = VK_IMAGE_ASPECT_COLOR_BIT | validPlaneAspects;
+
+    static uint32_t GetPlaneIndex(VkImageAspectFlagBits planeAspect);
 
     static VkResult Create(const VulkanDeviceContext* vkDevCtx,
                            uint32_t queueFamilyIndex,
@@ -40,6 +49,8 @@ class VulkanFilterYuvCompute : public VulkanFilter
                            uint32_t maxNumFrames,
                            VkFormat inputFormat,
                            VkFormat outputFormat,
+                           bool inputEnableMsbToLsbShift,
+                           bool outputEnableLsbToMsbShift,
                            const VkSamplerYcbcrConversionCreateInfo* pYcbcrConversionCreateInfo,
                            const YcbcrPrimariesConstants* pYcbcrPrimariesConstants,
                            const VkSamplerCreateInfo* pSamplerCreateInfo,
@@ -52,6 +63,8 @@ class VulkanFilterYuvCompute : public VulkanFilter
                            uint32_t maxNumFrames,
                            VkFormat inputFormat,
                            VkFormat outputFormat,
+                           bool inputEnableMsbToLsbShift,
+                           bool outputEnableLsbToMsbShift,
                            const YcbcrPrimariesConstants* pYcbcrPrimariesConstants)
         : VulkanFilter(vkDevCtx, queueFamilyIndex, queueIndex)
         , m_filterType(filterType)
@@ -71,7 +84,11 @@ class VulkanFilterYuvCompute : public VulkanFilter
                                 VK_IMAGE_ASPECT_PLANE_0_BIT |
                                 VK_IMAGE_ASPECT_PLANE_1_BIT |
                                 VK_IMAGE_ASPECT_PLANE_2_BIT)
+        , m_inputEnableMsbToLsbShift(inputEnableMsbToLsbShift)
+        , m_outputEnableLsbToMsbShift(outputEnableLsbToMsbShift)
         , m_enableRowAndColumnReplication(true)
+        , m_inputIsBuffer(false)
+        , m_outputIsBuffer(false)
     {
         // FIXME: m_ycbcrPrimariesConstants is currently unused but is kept for future use.
         (void)m_ycbcrPrimariesConstants;
@@ -116,263 +133,205 @@ class VulkanFilterYuvCompute : public VulkanFilter
         assert(m_vkDevCtx != nullptr);
     }
 
+    uint32_t UpdateBufferDescriptorSets(const VkBuffer*            vkBuffers,
+                                        uint32_t                   numVkBuffers,
+                                        const VkSubresourceLayout* vkBufferSubresourceLayout,
+                                        uint32_t                   numPlanes,
+                                        VkImageAspectFlags         validImageAspects,
+                                        uint32_t&                  descrIndex,
+                                        uint32_t&                  baseBinding,
+                                        VkDescriptorType           descriptorType, // Ex: VK_DESCRIPTOR_TYPE_STORAGE_BUFFER
+                                        VkDescriptorBufferInfo bufferDescriptors[maxNumComputeDescr],
+                                        std::array<VkWriteDescriptorSet, maxNumComputeDescr>& writeDescriptorSets,
+                                        const uint32_t maxDescriptors = maxNumComputeDescr);
+
+    uint32_t  UpdateImageDescriptorSets(const VkImageResourceView* inputImageView,
+                                        VkImageAspectFlags         validImageAspects,
+                                        VkSampler                  convSampler,
+                                        VkImageLayout              imageLayout,
+                                        uint32_t&                  descrIndex,
+                                        uint32_t&                  baseBinding,
+                                        VkDescriptorType           descriptorType, // Ex: VK_DESCRIPTOR_TYPE_STORAGE_IMAGE
+                                        VkDescriptorImageInfo      imageDescriptors[maxNumComputeDescr],
+                                        std::array<VkWriteDescriptorSet, maxNumComputeDescr>& writeDescriptorSets,
+                                        const uint32_t maxDescriptors = maxNumComputeDescr);
+
+    // Image input -> Image output
     virtual VkResult RecordCommandBuffer(VkCommandBuffer cmdBuf,
                                          const VkImageResourceView* inputImageView,
                                          const VkVideoPictureResourceInfoKHR * inputImageResourceInfo,
                                          const VkImageResourceView* outputImageView,
                                          const VkVideoPictureResourceInfoKHR * outputImageResourceInfo,
-                                         uint32_t bufferIdx)
-    {
-
-        assert(cmdBuf != VK_NULL_HANDLE);
-
-        m_vkDevCtx->CmdBindPipeline(cmdBuf, VK_PIPELINE_BIND_POINT_COMPUTE, m_computePipeline.getPipeline());
-
-        VkDescriptorSetLayoutCreateFlags layoutMode = m_descriptorSetLayout.GetDescriptorSetLayoutInfo().GetDescriptorLayoutMode();
-
-        switch (layoutMode) {
-            case VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR:
-            case VK_DESCRIPTOR_SET_LAYOUT_CREATE_DESCRIPTOR_BUFFER_BIT_EXT:
-            {
-
-                const uint32_t maxNumComputeDescr = 8;
-                VkDescriptorImageInfo imageDescriptors[8]{};
-                std::array<VkWriteDescriptorSet, maxNumComputeDescr> writeDescriptorSets{};
-
-                // Images
-                uint32_t set = 0;
-                uint32_t descrIndex = 0;
-                uint32_t dstBinding = 0;
-                // RGBA color converted by an YCbCr sample
-                if (m_inputImageAspects & VK_IMAGE_ASPECT_COLOR_BIT) {
-                    writeDescriptorSets[descrIndex].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
-                    writeDescriptorSets[descrIndex].dstSet = VK_NULL_HANDLE;
-                    writeDescriptorSets[descrIndex].dstBinding = dstBinding;
-                    writeDescriptorSets[descrIndex].descriptorCount = 1;
-                    writeDescriptorSets[descrIndex].descriptorType = (m_samplerYcbcrConversion.GetSampler() != VK_NULL_HANDLE) ?
-                                                                        VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER :
-                                                                        VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
-
-                    imageDescriptors[descrIndex].sampler = m_samplerYcbcrConversion.GetSampler();
-                    imageDescriptors[descrIndex].imageView = inputImageView->GetImageView();
-                    assert(imageDescriptors[descrIndex].imageView);
-                    imageDescriptors[descrIndex].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
-                    writeDescriptorSets[descrIndex].pImageInfo = &imageDescriptors[descrIndex]; // RGBA or Sampled YCbCr
-                    descrIndex++;
-                }
-                dstBinding++;
-
-                uint32_t planeNum = 0;
-                // y plane - G -> R8
-                if ((m_inputImageAspects & (VK_IMAGE_ASPECT_PLANE_0_BIT << planeNum)) &&
-                        (planeNum < inputImageView->GetNumberOfPlanes())) {
-                    writeDescriptorSets[descrIndex].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
-                    writeDescriptorSets[descrIndex].dstSet = VK_NULL_HANDLE;
-                    writeDescriptorSets[descrIndex].dstBinding = dstBinding;
-                    writeDescriptorSets[descrIndex].descriptorCount = 1;
-                    writeDescriptorSets[descrIndex].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
-                    imageDescriptors[descrIndex].sampler = VK_NULL_HANDLE;
-                    imageDescriptors[descrIndex].imageView = inputImageView->GetPlaneImageView(planeNum++);
-                    assert(imageDescriptors[descrIndex].imageView);
-                    imageDescriptors[descrIndex].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
-                    writeDescriptorSets[descrIndex].pImageInfo = &imageDescriptors[descrIndex]; // Y (0) plane
-                    descrIndex++;
-                }
-                dstBinding++;
-
-                // CbCr plane - BR -> R8B8
-                if ((m_inputImageAspects & (VK_IMAGE_ASPECT_PLANE_0_BIT << planeNum)) &&
-                        (planeNum < inputImageView->GetNumberOfPlanes())) {
-                    writeDescriptorSets[descrIndex].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
-                    writeDescriptorSets[descrIndex].dstSet = VK_NULL_HANDLE;
-                    writeDescriptorSets[descrIndex].dstBinding = dstBinding;
-                    writeDescriptorSets[descrIndex].descriptorCount = 1;
-                    writeDescriptorSets[descrIndex].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
-                    imageDescriptors[descrIndex].sampler = VK_NULL_HANDLE;
-                    imageDescriptors[descrIndex].imageView = inputImageView->GetPlaneImageView(planeNum++);
-                    assert(imageDescriptors[descrIndex].imageView);
-                    imageDescriptors[descrIndex].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
-                    writeDescriptorSets[descrIndex].pImageInfo = &imageDescriptors[descrIndex]; // CbCr (1) plane
-                    descrIndex++;
-                }
-                dstBinding++;
-
-                // Cr plane - R -> R8
-                if ((m_inputImageAspects & (VK_IMAGE_ASPECT_PLANE_0_BIT << planeNum)) &&
-                        (planeNum < inputImageView->GetNumberOfPlanes())) {
-                    writeDescriptorSets[descrIndex].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
-                    writeDescriptorSets[descrIndex].dstSet = VK_NULL_HANDLE;
-                    writeDescriptorSets[descrIndex].dstBinding = dstBinding;
-                    writeDescriptorSets[descrIndex].descriptorCount = 1;
-                    writeDescriptorSets[descrIndex].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
-                    imageDescriptors[descrIndex].sampler = VK_NULL_HANDLE;
-                    imageDescriptors[descrIndex].imageView = inputImageView->GetPlaneImageView(planeNum++);
-                    assert(imageDescriptors[descrIndex].imageView);
-                    imageDescriptors[descrIndex].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
-                    writeDescriptorSets[descrIndex].pImageInfo = &imageDescriptors[descrIndex]; // CbCr (1) plane
-                    descrIndex++;
-                }
-                dstBinding++;
-
-                // Out RGBA or single planar YCbCr image
-                if (m_outputImageAspects & VK_IMAGE_ASPECT_COLOR_BIT) {
-                    writeDescriptorSets[descrIndex].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
-                    writeDescriptorSets[descrIndex].dstSet = VK_NULL_HANDLE;
-                    writeDescriptorSets[descrIndex].dstBinding = dstBinding;
-                    writeDescriptorSets[descrIndex].descriptorCount = 1;
-                    writeDescriptorSets[descrIndex].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
-                    imageDescriptors[descrIndex].sampler = VK_NULL_HANDLE;
-                    imageDescriptors[descrIndex].imageView = outputImageView->GetImageView();
-                    imageDescriptors[descrIndex].imageLayout = VK_IMAGE_LAYOUT_GENERAL;
-                    writeDescriptorSets[descrIndex].pImageInfo = &imageDescriptors[descrIndex];
-                    descrIndex++;
-                }
-                dstBinding++;
-
-                planeNum = 0;
-                // y plane out - G -> R8
-                if ((m_outputImageAspects & (VK_IMAGE_ASPECT_PLANE_0_BIT << planeNum)) &&
-                        (planeNum < outputImageView->GetNumberOfPlanes())) {
-                    writeDescriptorSets[descrIndex].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
-                    writeDescriptorSets[descrIndex].dstSet = VK_NULL_HANDLE;
-                    writeDescriptorSets[descrIndex].dstBinding = dstBinding;
-                    writeDescriptorSets[descrIndex].descriptorCount = 1;
-                    writeDescriptorSets[descrIndex].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
-                    imageDescriptors[descrIndex].sampler = VK_NULL_HANDLE;
-                    imageDescriptors[descrIndex].imageView = outputImageView->GetPlaneImageView(planeNum++);
-                    assert(imageDescriptors[descrIndex].imageView);
-                    imageDescriptors[descrIndex].imageLayout = VK_IMAGE_LAYOUT_GENERAL;
-                    writeDescriptorSets[descrIndex].pImageInfo = &imageDescriptors[descrIndex];
-                    descrIndex++;
-                }
-                dstBinding++;
-
-                // CbCr plane out - BR -> R8B8
-                if ((m_outputImageAspects & (VK_IMAGE_ASPECT_PLANE_0_BIT << planeNum)) &&
-                        (planeNum < outputImageView->GetNumberOfPlanes())) {
-                    writeDescriptorSets[descrIndex].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
-                    writeDescriptorSets[descrIndex].dstSet = VK_NULL_HANDLE;
-                    writeDescriptorSets[descrIndex].dstBinding = dstBinding;
-                    writeDescriptorSets[descrIndex].descriptorCount = 1;
-                    writeDescriptorSets[descrIndex].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
-                    imageDescriptors[descrIndex].sampler = VK_NULL_HANDLE;
-                    imageDescriptors[descrIndex].imageView = outputImageView->GetPlaneImageView(planeNum++);
-                    assert(imageDescriptors[descrIndex].imageView);
-                    imageDescriptors[descrIndex].imageLayout = VK_IMAGE_LAYOUT_GENERAL;
-                    writeDescriptorSets[descrIndex].pImageInfo = &imageDescriptors[descrIndex];
-                    descrIndex++;
-                }
-                dstBinding++;
-
-                // Cr plane out - R -> R8
-                if ((m_outputImageAspects & (VK_IMAGE_ASPECT_PLANE_0_BIT << planeNum)) &&
-                        (planeNum < outputImageView->GetNumberOfPlanes())) {
-                    writeDescriptorSets[descrIndex].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
-                    writeDescriptorSets[descrIndex].dstSet = VK_NULL_HANDLE;
-                    writeDescriptorSets[descrIndex].dstBinding = dstBinding;
-                    writeDescriptorSets[descrIndex].descriptorCount = 1;
-                    writeDescriptorSets[descrIndex].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
-                    imageDescriptors[descrIndex].sampler = VK_NULL_HANDLE;
-                    imageDescriptors[descrIndex].imageView = outputImageView->GetPlaneImageView(planeNum++);
-                    assert(imageDescriptors[descrIndex].imageView);
-                    imageDescriptors[descrIndex].imageLayout = VK_IMAGE_LAYOUT_GENERAL;
-                    writeDescriptorSets[descrIndex].pImageInfo = &imageDescriptors[descrIndex];
-                    descrIndex++;
-                }
-                dstBinding++;
-
-                assert(descrIndex <= maxNumComputeDescr);
-                assert(descrIndex >= 2);
-
-                if (layoutMode ==  VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR) {
-                    m_vkDevCtx->CmdPushDescriptorSetKHR(cmdBuf, VK_PIPELINE_BIND_POINT_COMPUTE,
-                                                        m_descriptorSetLayout.GetPipelineLayout(),
-                                                        set, descrIndex, writeDescriptorSets.data());
-                } else {
-
-                    VkDeviceOrHostAddressConstKHR imageDescriptorBufferDeviceAddress =
-                          m_descriptorSetLayout.UpdateDescriptorBuffer(bufferIdx,
-                                                                       set,
-                                                                       descrIndex,
-                                                                       writeDescriptorSets.data());
-
-
-                    // Descriptor buffer bindings
-                    // Set 0 = Image
-                    VkDescriptorBufferBindingInfoEXT bindingInfo{};
-                    bindingInfo.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_BUFFER_BINDING_INFO_EXT;
-                    bindingInfo.pNext = nullptr;
-                    bindingInfo.address = imageDescriptorBufferDeviceAddress.deviceAddress;
-                    bindingInfo.usage = VK_BUFFER_USAGE_SAMPLER_DESCRIPTOR_BUFFER_BIT_EXT |
-                                        VK_BUFFER_USAGE_RESOURCE_DESCRIPTOR_BUFFER_BIT_EXT;
-                    m_vkDevCtx->CmdBindDescriptorBuffersEXT(cmdBuf, 1, &bindingInfo);
-
-                    // Image (set 0)
-                    uint32_t bufferIndexImage = 0;
-                    VkDeviceSize bufferOffset = 0;
-                    m_vkDevCtx->CmdSetDescriptorBufferOffsetsEXT(cmdBuf, VK_PIPELINE_BIND_POINT_COMPUTE,
-                                                               m_descriptorSetLayout.GetPipelineLayout(),
-                                                               set, 1, &bufferIndexImage, &bufferOffset);
-                }
-            }
-            break;
-
-            default:
-            m_vkDevCtx->CmdBindDescriptorSets(cmdBuf, VK_PIPELINE_BIND_POINT_COMPUTE,
-                                              m_descriptorSetLayout.GetPipelineLayout(),
-                                              0, 1, m_descriptorSetLayout.GetDescriptorSet(), 0, 0);
-        }
-
-        struct ivec2 {
-            uint32_t width;
-            uint32_t height;
-
-            ivec2() : width(0), height(0) {}
-            ivec2(int32_t width_, int32_t height_) : width(width_), height(height_) {}
-        };
-
-        struct PushConstants {
-            uint32_t srcLayer;
-            uint32_t dstLayer;
-            ivec2    inputSize;  // Original input image size (width, height)
-            ivec2    outputSize; // Output image size (width, height, with padding)
-        };
-
-        PushConstants pushConstants = {
-                inputImageResourceInfo->baseArrayLayer, // Set the source layer index
-                outputImageResourceInfo->baseArrayLayer, // Set the destination layer index
-                ivec2(inputImageResourceInfo->codedExtent.width, inputImageResourceInfo->codedExtent.height),
-                ivec2(outputImageResourceInfo->codedExtent.width, outputImageResourceInfo->codedExtent.height)
-        };
-
-        m_vkDevCtx->CmdPushConstants(cmdBuf,
-                                     m_descriptorSetLayout.GetPipelineLayout(),
-                                     VK_SHADER_STAGE_COMPUTE_BIT,
-                                     0, // offset
-                                     sizeof(PushConstants),
-                                     &pushConstants);
-
-        const uint32_t  workgroupWidth  = (pushConstants.outputSize.width  + (m_workgroupSizeX - 1)) / m_workgroupSizeX;
-        const uint32_t  workgroupHeight = (pushConstants.outputSize.height + (m_workgroupSizeY - 1)) / m_workgroupSizeY;
-        m_vkDevCtx->CmdDispatch(cmdBuf, workgroupWidth, workgroupHeight, 1);
-
-        return VK_SUCCESS;
-    }
+                                         uint32_t bufferIdx) override;
+    // Buffer input -> Image output
+    VkResult RecordCommandBuffer(VkCommandBuffer cmdBuf,
+                                const VkBuffer*            inBuffers,     // with size numInBuffers
+                                uint32_t                   numInBuffers,
+                                const VkFormat*            inBufferFormats, // with size inBufferNumPlanes
+                                const VkSubresourceLayout* inBufferSubresourceLayouts, // with size inBufferNumPlanes
+                                uint32_t                   inBufferNumPlanes,
+                                const VkImageResourceView* outImageView,
+                                const VkVideoPictureResourceInfoKHR* outImageResourceInfo,
+                                const VkBufferImageCopy*   pBufferImageCopy,
+                                uint32_t bufferIdx);
+
+    // Image input -> Buffer output
+    VkResult RecordCommandBuffer(VkCommandBuffer cmdBuf,
+                                const VkImageResourceView* inImageView,
+                                const VkVideoPictureResourceInfoKHR* inImageResourceInfo,
+                                const VkBuffer*            outBuffers,        // with size numOutBuffers
+                                uint32_t                   numOutBuffers,
+                                const VkFormat*            inBufferFormats,   // with size outBufferNumPlanes
+                                const VkSubresourceLayout* outBufferSubresourceLayouts, // with size outBufferNumPlanes
+                                uint32_t                   outBufferNumPlanes,
+                                const VkBufferImageCopy*   pBufferImageCopy,
+                                uint32_t bufferIdx);
+
+    // Buffer input -> Buffer output
+    VkResult RecordCommandBuffer(VkCommandBuffer cmdBuf,
+                                 const VkBuffer*            inBuffers,       // with size numInBuffers
+                                 uint32_t                   numInBuffers,
+                                 const VkFormat*            inBufferFormats, // with size inBufferNumPlanes
+                                 const VkSubresourceLayout* inBufferSubresourceLayouts, // with size inBufferNumPlanes
+                                 uint32_t                   inBufferNumPlanes,
+                                 const VkExtent3D&          inBufferExtent,
+                                 const VkBuffer*            outBuffers,        // with size numOutBuffers
+                                 uint32_t                   numOutBuffers,
+                                 const VkFormat*            outBufferFormats,   // with size outBufferNumPlanes
+                                 const VkSubresourceLayout* outBufferSubresourceLayouts, // with size outBufferNumPlanes
+                                 uint32_t                   outBufferNumPlanes,
+                                 const VkExtent3D&          outBufferExtent,
+                                 uint32_t bufferIdx);
 
 private:
     VkResult InitDescriptorSetLayout(uint32_t maxNumFrames);
-    void ShaderGeneratePlaneDescriptors(std::stringstream& computeShader,
-                                          VkImageAspectFlags& imageAspects,
-                                          const char *imageName,
-                                          VkFormat    imageFormat,
-                                          bool isInput,
-                                          uint32_t startBinding = 0,
-                                          uint32_t set = 0,
-                                          bool imageArray = true);
+
+    /**
+     * @brief Generates GLSL image descriptor bindings for shader input/output
+     *
+     * Creates appropriate GLSL image binding declarations based on the input/output format.
+     * Handles different YUV formats like single-plane (RGBA), 2-plane (NV12/NV21), and 3-plane (I420, etc).
+     *
+     * @param computeShader Output stringstream for shader code
+     * @param imageAspects Output parameter to store the image aspect flags used
+     * @param imageName Base image variable name
+     * @param imageFormat Vulkan format of the image
+     * @param isInput Whether this is an input or output resource
+     * @param startBinding Starting binding number in the descriptor set
+     * @param set Descriptor set number
+     * @param imageArray Whether to use image2DArray or image2D
+     * @return The next available binding number after all descriptors are created
+     */
+    uint32_t ShaderGenerateImagePlaneDescriptors(std::stringstream& computeShader,
+                                                 VkImageAspectFlags& imageAspects,
+                                                 const char *imageName,
+                                                 VkFormat    imageFormat,
+                                                 bool isInput,
+                                                 uint32_t startBinding = 0,
+                                                 uint32_t set = 0,
+                                                 bool imageArray = true);
+
+    /**
+     * @brief Generates GLSL buffer descriptor bindings for shader input/output
+     *
+     * Creates appropriate GLSL buffer binding declarations based on the input/output format.
+     * Handles different YUV buffer layouts matching single-plane, 2-plane, or 3-plane formats.
+     *
+     * @param shaderStr Output stringstream for shader code
+     * @param imageAspects Output parameter to store the image aspect flags used
+     * @param bufferName Base buffer variable name
+     * @param bufferFormat Vulkan format of the buffer data
+     * @param isInput Whether this is an input or output resource
+     * @param startBinding Starting binding number in the descriptor set
+     * @param set Descriptor set number
+     * @param bufferType The Vulkan descriptor type to use for the buffer
+     * @return The next available binding number after all descriptors are created
+     */
+    uint32_t ShaderGenerateBufferPlaneDescriptors(std::stringstream& shaderStr,
+                                                  VkImageAspectFlags& imageAspects,
+                                                  const char *bufferName,
+                                                  VkFormat    bufferFormat,
+                                                  bool isInput,
+                                                  uint32_t startBinding = 0,
+                                                  uint32_t set = 0,
+                                                  VkDescriptorType bufferType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
+
+    /**
+     * @brief Unified descriptor generation for either buffer or image resources
+     *
+     * Delegates to either ShaderGenerateImagePlaneDescriptors or ShaderGenerateBufferPlaneDescriptors
+     * based on the resource type (image or buffer) needed for input/output.
+     *
+     * @param shaderStr Output stringstream for shader code
+     * @param isInput Whether this is an input or output resource
+     * @param startBinding Starting binding number in the descriptor set
+     * @param set Descriptor set number
+     * @param imageArray Whether to use image2DArray or image2D (for image resources)
+     * @param bufferType The Vulkan descriptor type to use for buffer resources
+     * @return The next available binding number after all descriptors are created
+     */
+    uint32_t ShaderGeneratePlaneDescriptors(std::stringstream& shaderStr,
+                                            bool isInput,
+                                            uint32_t startBinding,
+                                            uint32_t set,
+                                            bool imageArray,
+                                            VkDescriptorType bufferType);
+
+    /**
+     * @brief Initializes GLSL shader for YCbCr copy operation
+     *
+     * Generates a compute shader that copies YCbCr data from input to output
+     * without any color space conversion, preserving the format.
+     *
+     * @param computeShader Output string for the complete GLSL shader code
+     * @return Size of the generated shader code in bytes
+     */
     size_t InitYCBCRCOPY(std::string& computeShader);
+
+    /**
+     * @brief Initializes GLSL shader for YCbCr clear operation
+     *
+     * Generates a compute shader that clears/fills YCbCr data in the output
+     * resource with constant values.
+     *
+     * @param computeShader Output string for the complete GLSL shader code
+     * @return Size of the generated shader code in bytes
+     */
     size_t InitYCBCRCLEAR(std::string& computeShader);
+
+    /**
+     * @brief Initializes GLSL shader for YCbCr to RGBA conversion
+     *
+     * Generates a compute shader that converts YCbCr input to RGBA output
+     * using the appropriate color space conversion matrix.
+     *
+     * @param computeShader Output string for the complete GLSL shader code
+     * @return Size of the generated shader code in bytes
+     */
     size_t InitYCBCR2RGBA(std::string& computeShader);
 
+    /**
+     * @brief Initializes GLSL shader for RGBA to YCbCr conversion
+     *
+     * Generates a compute shader that converts RGBA input to YCbCr output
+     * using the appropriate color space conversion matrix.
+     *
+     * @param computeShader Output string for the complete GLSL shader code
+     * @return Size of the generated shader code in bytes
+     */
+    size_t InitRGBA2YCBCR(std::string& computeShader);
+
+    /**
+     * @brief Initializes GLSL shader for YUV to NV12 conversion using buffer input
+     *
+     * Generates a compute shader that converts YUV input from buffer to NV12 output,
+     * handling different YUV formats (I420, I422, I444) with appropriate chroma subsampling.
+     *
+     * @param computeShader Output string for the complete GLSL shader code
+     * @return Size of the generated shader code in bytes
+     */
+    size_t InitYUV2NV12FromBuffer(std::string& computeShader);
+
 private:
     const FilterType                         m_filterType;
     VkFormat                                 m_inputFormat;
@@ -386,8 +345,32 @@ class VulkanFilterYuvCompute : public VulkanFilter
     VulkanComputePipeline                    m_computePipeline;
     VkImageAspectFlags                       m_inputImageAspects;
     VkImageAspectFlags                       m_outputImageAspects;
+    uint32_t                                 m_inputEnableMsbToLsbShift : 1;
+    uint32_t                                 m_outputEnableLsbToMsbShift : 1;
     uint32_t                                 m_enableRowAndColumnReplication : 1;
-
+    uint32_t                                 m_inputIsBuffer : 1;
+    uint32_t                                 m_outputIsBuffer : 1;
+
+    struct PushConstants {
+        uint32_t srcLayer;        // src image layer to use
+        uint32_t dstLayer;        // dst image layer to use
+        uint32_t inputWidth;      // input image or buffer width
+        uint32_t inputHeight;     // input image or buffer height
+        uint32_t outputWidth;     // output image or buffer width
+        uint32_t outputHeight;    // output image or buffer height
+        uint32_t inYOffset;       // input buffer Y plane offset
+        uint32_t inCbOffset;      // input buffer Cb plane offset
+        uint32_t inCrOffset;      // input buffer Cr plane offset
+        uint32_t inYPitch;        // input buffer Y plane pitch
+        uint32_t inCbPitch;       // input buffer Cb plane pitch
+        uint32_t inCrPitch;       // input buffer Cr plane pitch
+        uint32_t outYOffset;      // output buffer Y plane offset
+        uint32_t outCbOffset;     // output buffer Cb plane offset
+        uint32_t outCrOffset;     // output buffer Cr plane offset
+        uint32_t outYPitch;       // output buffer Y plane pitch
+        uint32_t outCbPitch;      // output buffer Cb plane pitch
+        uint32_t outCrPitch;      // output buffer Cr plane pitch
+    };
 };
 
 #endif /* _VULKANFILTERYUVCOMPUTE_H_ */
diff --git a/common/libs/VkCodecUtils/VulkanShaderCompiler.cpp b/common/libs/VkCodecUtils/VulkanShaderCompiler.cpp
index 20fc073e..89215a8b 100644
--- a/common/libs/VkCodecUtils/VulkanShaderCompiler.cpp
+++ b/common/libs/VkCodecUtils/VulkanShaderCompiler.cpp
@@ -103,13 +103,19 @@ VkShaderModule VulkanShaderCompiler::BuildShaderFromFile(const char *fileName,
                                                          VkShaderStageFlagBits type,
                                                          const VulkanDeviceContext* vkDevCtx)
 {
+#ifdef seekg
     // read file from the path
     std::ifstream is(fileName, std::ios::binary | std::ios::in | std::ios::ate);
 
     if (is.is_open()) {
-
-        size_t size = is.tellg();
-        is.seekg(0, std::ios::beg);
+        is.seekg (0, is.end);
+        std::streamoff fileSize = is.tellg();
+        if (fileSize < 0 || static_cast<size_t>(fileSize) > std::numeric_limits<size_t>::max()) {
+            std::cerr << "File size is too large or invalid" << std::endl;
+            return VK_NULL_HANDLE;
+        }
+        size_t size = static_cast<size_t>(fileSize);
+        is.seekg(0, is.beg);
         char* shaderCode = new char[size];
         is.read(shaderCode, size);
         is.close();
@@ -122,6 +128,7 @@ VkShaderModule VulkanShaderCompiler::BuildShaderFromFile(const char *fileName,
 
         return shaderModule;
     }
+#endif
 
     return VK_NULL_HANDLE;
 }
diff --git a/common/libs/VkCodecUtils/VulkanVideoProcessor.cpp b/common/libs/VkCodecUtils/VulkanVideoProcessor.cpp
index ebe00067..d6e1fd18 100644
--- a/common/libs/VkCodecUtils/VulkanVideoProcessor.cpp
+++ b/common/libs/VkCodecUtils/VulkanVideoProcessor.cpp
@@ -115,11 +115,13 @@ VkResult VulkanVideoProcessor::Initialize(const VulkanDeviceContext* vkDevCtx,
         return result;
     }
 
-    VkVideoCoreProfile videoProfile(m_videoStreamDemuxer->GetVideoCodec(),
-                                    m_videoStreamDemuxer->GetChromaSubsampling(),
-                                    m_videoStreamDemuxer->GetLumaBitDepth(),
-                                    m_videoStreamDemuxer->GetChromaBitDepth(),
-                                    m_videoStreamDemuxer->GetProfileIdc());
+    VkVideoCoreProfile videoProfile ({
+        m_videoStreamDemuxer->GetVideoCodec(),
+        m_videoStreamDemuxer->GetChromaSubsampling(),
+        m_videoStreamDemuxer->GetLumaBitDepth(),
+        m_videoStreamDemuxer->GetChromaBitDepth(),
+        m_videoStreamDemuxer->GetProfileIdc()
+    });
 
     if (!VulkanVideoCapabilities::IsCodecTypeSupported(vkDevCtx,
                                                        vkDevCtx->GetVideoDecodeQueueFamilyIdx(),
@@ -194,12 +196,11 @@ VkResult VulkanVideoProcessor::Create(const DecoderConfig& settings, const Vulka
 
 VkVideoProfileInfoKHR VulkanVideoProcessor::GetVkProfile() const
 {
-
-    VkVideoProfileInfoKHR videoProfile({VK_STRUCTURE_TYPE_VIDEO_PROFILE_INFO_KHR, NULL,
+    VkVideoProfileInfoKHR videoProfile {VK_STRUCTURE_TYPE_VIDEO_PROFILE_INFO_KHR, NULL,
                                         m_videoStreamDemuxer->GetVideoCodec(),
                                         m_videoStreamDemuxer->GetChromaSubsampling(),
                                         m_videoStreamDemuxer->GetLumaBitDepth(),
-                                        m_videoStreamDemuxer->GetChromaBitDepth()});
+                                        m_videoStreamDemuxer->GetChromaBitDepth()};
 
     return videoProfile;
 }
@@ -229,10 +230,10 @@ VkFormat VulkanVideoProcessor::GetFrameImageFormat()  const
 
 VkExtent3D VulkanVideoProcessor::GetVideoExtent() const
 {
-    VkExtent3D extent ({ (uint32_t)m_videoStreamDemuxer->GetWidth(),
-                         (uint32_t)m_videoStreamDemuxer->GetHeight(),
-                         (uint32_t)1
-                       });
+    VkExtent3D extent { (uint32_t)m_videoStreamDemuxer->GetWidth(),
+                        (uint32_t)m_videoStreamDemuxer->GetHeight(),
+                        (uint32_t)1
+                      };
     return extent;
 }
 
diff --git a/common/libs/VkCodecUtils/VulkanVideoProcessor.h b/common/libs/VkCodecUtils/VulkanVideoProcessor.h
index cbdca1f1..0eb08e9c 100644
--- a/common/libs/VkCodecUtils/VulkanVideoProcessor.h
+++ b/common/libs/VkCodecUtils/VulkanVideoProcessor.h
@@ -23,6 +23,10 @@
 #include "VkCodecUtils/VkVideoQueue.h"
 #include "VkVideoFrameOutput.h"
 
+// Forward declarations
+class VulkanDeviceContext;
+struct VkMpFormatInfo;
+
 class VulkanVideoProcessor : public VkVideoQueue<VulkanDecodedFrame> {
 public:
 
diff --git a/common/libs/VkShell/Shell.h b/common/libs/VkShell/Shell.h
index c9c6c233..b91223b0 100644
--- a/common/libs/VkShell/Shell.h
+++ b/common/libs/VkShell/Shell.h
@@ -66,7 +66,10 @@ class Shell : public VkWsiDisplay, public VkVideoRefCountBase {
         if ((res != VK_SUCCESS) && (res != VK_SUBOPTIMAL_KHR)) {
             std::stringstream ss;
             ss << "VkResult " << res << " returned";
+#ifdef __cpp_exceptions
             throw std::runtime_error(ss.str());
+#endif // __cpp_exceptions
+
         }
 
         return res;
diff --git a/vk_video_decoder/demos/vk-video-dec/CMakeLists.txt b/vk_video_decoder/demos/vk-video-dec/CMakeLists.txt
index 30e3e4cd..5ebba8a3 100644
--- a/vk_video_decoder/demos/vk-video-dec/CMakeLists.txt
+++ b/vk_video_decoder/demos/vk-video-dec/CMakeLists.txt
@@ -50,6 +50,7 @@ set(sources
     ${VK_VIDEO_COMMON_LIBS_SOURCE_ROOT}/VkCodecUtils/VulkanCommandBufferPool.cpp
     ${VK_VIDEO_COMMON_LIBS_SOURCE_ROOT}/VkCodecUtils/VulkanCommandBufferPool.h
     ${VK_VIDEO_COMMON_LIBS_SOURCE_ROOT}/VkCodecUtils/VkVideoFrameToFile.cpp
+    ${VK_VIDEO_COMMON_LIBS_SOURCE_ROOT}/VkCodecUtils/crcgenerator.cpp
     ${VK_VIDEO_DECODER_LIBS_SOURCE_ROOT}/VkDecoderUtils/VideoStreamDemuxer.cpp
     ${VK_VIDEO_DECODER_LIBS_SOURCE_ROOT}/VkDecoderUtils/VideoStreamDemuxer.h
     ${VK_VIDEO_DECODER_LIBS_SOURCE_ROOT}/VkDecoderUtils/ElementaryStream.cpp
diff --git a/vk_video_decoder/libs/NvVideoParser/src/VulkanAV1Decoder.cpp b/vk_video_decoder/libs/NvVideoParser/src/VulkanAV1Decoder.cpp
index bc65f33f..c401eec1 100644
--- a/vk_video_decoder/libs/NvVideoParser/src/VulkanAV1Decoder.cpp
+++ b/vk_video_decoder/libs/NvVideoParser/src/VulkanAV1Decoder.cpp
@@ -1132,7 +1132,7 @@ static uint32_t tile_log2(int blk_size, int target)
     return k;
 }
 
-uint32_t FloorLog2(uint32_t x)
+static uint32_t FloorLog2(uint32_t x)
 {
     int s = 0;
 
@@ -2289,7 +2289,11 @@ bool VulkanAV1Decoder::ParseObuTileGroup(const AV1ObuHeader& hdr)
             consumedBytes += tile_size_bytes_minus_1 + 1;
             m_PicData.tileOffsets[m_PicData.khr_info.tileCount] = (uint32_t)m_nalu.start_offset + (uint32_t)consumedBytes;
 
-            tileSize = tile_size_minus_1 + 1;
+            // Add bounds checking and safe conversion
+            if (tile_size_minus_1 > (SIZE_MAX - 1)) {
+                return false; // Tile size too large
+            }
+            tileSize = (size_t)(tile_size_minus_1 + 1);
             consumedBytes += (uint32_t)tileSize;
 
             skip_bits((uint32_t)(tileSize * 8));
@@ -2302,7 +2306,7 @@ bool VulkanAV1Decoder::ParseObuTileGroup(const AV1ObuHeader& hdr)
     return (tg_end == num_tiles - 1);
 }
 
-bool IsObuInCurrentOperatingPoint(int  current_operating_point, AV1ObuHeader *hdr) {
+static bool IsObuInCurrentOperatingPoint(int  current_operating_point, AV1ObuHeader *hdr) {
     if (current_operating_point == 0) return true;
     if (((current_operating_point >> hdr->temporal_id) & 0x1) &&
         ((current_operating_point >> (hdr->spatial_id + 8)) & 0x1)) {
diff --git a/vk_video_decoder/libs/NvVideoParser/src/VulkanAV1GlobalMotionDec.cpp b/vk_video_decoder/libs/NvVideoParser/src/VulkanAV1GlobalMotionDec.cpp
index e5a35316..37691fe5 100644
--- a/vk_video_decoder/libs/NvVideoParser/src/VulkanAV1GlobalMotionDec.cpp
+++ b/vk_video_decoder/libs/NvVideoParser/src/VulkanAV1GlobalMotionDec.cpp
@@ -82,7 +82,7 @@
 #define WARP_PARAM_REDUCE_BITS 6
 #define WARPEDMODEL_PREC_BITS 16
 
-int get_msb(unsigned int n)
+static int get_msb(unsigned int n)
 {
     int log = 0;
     unsigned int value = n;
diff --git a/vk_video_decoder/libs/VkVideoDecoder/VkVideoDecoder.cpp b/vk_video_decoder/libs/VkVideoDecoder/VkVideoDecoder.cpp
index 2b9f6b66..c855386a 100644
--- a/vk_video_decoder/libs/VkVideoDecoder/VkVideoDecoder.cpp
+++ b/vk_video_decoder/libs/VkVideoDecoder/VkVideoDecoder.cpp
@@ -334,16 +334,18 @@ int32_t VkVideoDecoder::StartVideoSequence(VkParserDetectedVideoFormat* pVideoFo
 
         if (needNewFilter) {
             result = VulkanFilterYuvCompute::Create(m_vkDevCtx,
-                                                    m_vkDevCtx->GetComputeQueueFamilyIdx(),
-                                                    0,
-                                                    m_filterType,
-                                                    numDecodeSurfaces + 1,
-                                                    inputFormat,
-                                                    outputFormat,
-                                                    &ycbcrConversionCreateInfo,
-                                                    &ycbcrPrimariesConstants,
-                                                    &samplerInfo,
-                                                    m_yuvFilter);
+                                                m_vkDevCtx->GetComputeQueueFamilyIdx(),
+                                                0,
+                                                m_filterType,
+                                                numDecodeSurfaces + 1,
+                                                inputFormat,
+                                                outputFormat,
+                                                false, // inputEnableMsbToLsbShift
+                                                false, // outputEnableLsbToMsbShift
+                                                &ycbcrConversionCreateInfo,
+                                                &ycbcrPrimariesConstants,
+                                                &samplerInfo,
+                                                m_yuvFilter);
         }
         if (result == VK_SUCCESS) {
 
diff --git a/vk_video_decoder/src/vulkan_video_decoder.cpp b/vk_video_decoder/src/vulkan_video_decoder.cpp
index 1d0e0541..f98f3f82 100644
--- a/vk_video_decoder/src/vulkan_video_decoder.cpp
+++ b/vk_video_decoder/src/vulkan_video_decoder.cpp
@@ -66,10 +66,11 @@ class VulkanVideoDecoderImpl : public VulkanVideoDecoder {
 
     virtual VkExtent3D GetVideoExtent() const
     {
-        VkExtent3D extent ({ (uint32_t)m_vulkanVideoProcessor->GetWidth(),
-                             (uint32_t)m_vulkanVideoProcessor->GetHeight(),
-                             (uint32_t)1
-                           });
+        VkExtent3D extent {
+            (uint32_t)m_vulkanVideoProcessor->GetWidth(),
+            (uint32_t)m_vulkanVideoProcessor->GetHeight(),
+            1
+        };
         return extent;
     }
 
diff --git a/vk_video_decoder/test/vulkan-video-dec/CMakeLists.txt b/vk_video_decoder/test/vulkan-video-dec/CMakeLists.txt
index 7f10d58f..084a6676 100644
--- a/vk_video_decoder/test/vulkan-video-dec/CMakeLists.txt
+++ b/vk_video_decoder/test/vulkan-video-dec/CMakeLists.txt
@@ -30,6 +30,7 @@ set(VULKAN_VIDEO_DEC_SOURCES
     ${VK_VIDEO_COMMON_LIBS_SOURCE_ROOT}/VkCodecUtils/VulkanSamplerYcbcrConversion.h
     ${VK_VIDEO_COMMON_LIBS_SOURCE_ROOT}/VkCodecUtils/nvVkFormats.cpp
     ${VK_VIDEO_COMMON_LIBS_SOURCE_ROOT}/VkCodecUtils/VkVideoFrameToFile.cpp
+    ${VK_VIDEO_COMMON_LIBS_SOURCE_ROOT}/VkCodecUtils/crcgenerator.cpp
     ${VK_VIDEO_DECODER_LIBS_SOURCE_ROOT}/VkDecoderUtils/VideoStreamDemuxer.cpp
     ${VK_VIDEO_DECODER_LIBS_SOURCE_ROOT}/VkDecoderUtils/VideoStreamDemuxer.h
     ${VK_VIDEO_DECODER_LIBS_SOURCE_ROOT}/VkDecoderUtils/ElementaryStream.cpp
diff --git a/vk_video_decoder/test/vulkan-video-simple-dec/CMakeLists.txt b/vk_video_decoder/test/vulkan-video-simple-dec/CMakeLists.txt
index 30cf00be..d533f95e 100644
--- a/vk_video_decoder/test/vulkan-video-simple-dec/CMakeLists.txt
+++ b/vk_video_decoder/test/vulkan-video-simple-dec/CMakeLists.txt
@@ -1,6 +1,7 @@
 set(VULKAN_VIDEO_SIMPLE_DEC_SOURCES
     Main.cpp
     ${VK_VIDEO_COMMON_LIBS_SOURCE_ROOT}/VkCodecUtils/nvVkFormats.cpp
+    ${VK_VIDEO_COMMON_LIBS_SOURCE_ROOT}/VkCodecUtils/crcgenerator.cpp
     ${VK_VIDEO_DECODER_LIBS_SOURCE_ROOT}/VkDecoderUtils/VideoStreamDemuxer.cpp
     ${VK_VIDEO_DECODER_LIBS_SOURCE_ROOT}/VkDecoderUtils/VideoStreamDemuxer.h
     ${VK_VIDEO_DECODER_LIBS_SOURCE_ROOT}/VkDecoderUtils/ElementaryStream.cpp
diff --git a/vk_video_encoder/demos/vk-video-enc/CMakeLists.txt b/vk_video_encoder/demos/vk-video-enc/CMakeLists.txt
index d3bba268..b043412b 100644
--- a/vk_video_encoder/demos/vk-video-enc/CMakeLists.txt
+++ b/vk_video_encoder/demos/vk-video-enc/CMakeLists.txt
@@ -80,6 +80,7 @@ set(sources
     ${VK_VIDEO_COMMON_LIBS_SOURCE_ROOT}/VkCodecUtils/nvVkFormats.cpp
     ${VK_VIDEO_COMMON_LIBS_SOURCE_ROOT}/VkCodecUtils/VulkanBistreamBufferImpl.h
     ${VK_VIDEO_COMMON_LIBS_SOURCE_ROOT}/VkCodecUtils/VulkanBistreamBufferImpl.cpp
+    ${VK_VIDEO_COMMON_LIBS_SOURCE_ROOT}/VkCodecUtils/crcgenerator.cpp
     ${VK_VIDEO_DECODER_LIBS_SOURCE_ROOT}/VulkanVideoFrameBuffer/VulkanVideoFrameBuffer.h
     ${VK_VIDEO_DECODER_LIBS_SOURCE_ROOT}/VulkanVideoFrameBuffer/VulkanVideoFrameBuffer.cpp
     )
@@ -118,6 +119,10 @@ if(TARGET vulkan)
     list(APPEND definitions PRIVATE -DUNINSTALLED_LOADER="$<TARGET_FILE:vulkan>")
 endif()
 
+if(USE_ENCODER_SHADERC)
+    list(APPEND definitions PRIVATE -DSHADERC_SUPPORT)
+endif()
+
 if(WIN32)
     list(APPEND definitions PRIVATE -DVK_USE_PLATFORM_WIN32_KHR)
     list(APPEND definitions PRIVATE -DWIN32_LEAN_AND_MEAN)
diff --git a/vk_video_encoder/demos/vk-video-enc/Main.cpp b/vk_video_encoder/demos/vk-video-enc/Main.cpp
index 31d24b2d..bb849f72 100644
--- a/vk_video_encoder/demos/vk-video-enc/Main.cpp
+++ b/vk_video_encoder/demos/vk-video-enc/Main.cpp
@@ -21,7 +21,7 @@
 #include "VkCodecUtils/VulkanEncoderFrameProcessor.h"
 #include "VkShell/Shell.h"
 
-int main(int argc, char** argv)
+int main(int argc, const char* argv[])
 {
     VkSharedBaseObj<EncoderConfig> encoderConfig;
     if (VK_SUCCESS != EncoderConfig::CreateCodecConfig(argc, argv, encoderConfig)) {
diff --git a/vk_video_encoder/include/vulkan_video_encoder.h b/vk_video_encoder/include/vulkan_video_encoder.h
index e757f238..f170fd4a 100644
--- a/vk_video_encoder/include/vulkan_video_encoder.h
+++ b/vk_video_encoder/include/vulkan_video_encoder.h
@@ -43,7 +43,7 @@
 class VulkanVideoEncoder : public virtual VkVideoRefCountBase {
 public:
     virtual VkResult Initialize(VkVideoCodecOperationFlagBitsKHR videoCodecOperation,
-                                int argc, char** argv) = 0;
+                                int argc, const char** argv) = 0;
     virtual int64_t  GetNumberOfFrames() = 0;
     virtual VkResult EncodeNextFrame(int64_t& frameNumEncoded) = 0;
     virtual VkResult GetBitstream() = 0;
@@ -52,7 +52,7 @@ class VulkanVideoEncoder : public virtual VkVideoRefCountBase {
 
 extern "C" VK_VIDEO_ENCODER_EXPORT
 VkResult CreateVulkanVideoEncoder(VkVideoCodecOperationFlagBitsKHR videoCodecOperation,
-                                  int argc, char** argv,
+                                  int argc, const char** argv,
                                   VkSharedBaseObj<VulkanVideoEncoder>& vulkanVideoEncoder);
 
 #endif /* _VULKAN_VIDEO_ENCODER_H_ */
diff --git a/vk_video_encoder/libs/CMakeLists.txt b/vk_video_encoder/libs/CMakeLists.txt
index 5cca8809..66685d33 100644
--- a/vk_video_encoder/libs/CMakeLists.txt
+++ b/vk_video_encoder/libs/CMakeLists.txt
@@ -88,7 +88,7 @@ set(LIBVKVIDEOENCODER_DEFINITIONS
     PRIVATE VK_VIDEO_ENCODER_IMPLEMENTATION
     PUBLIC VK_VIDEO_ENCODER_SHAREDLIB)
 
-if(USE_SHADERC)
+if(USE_ENCODER_SHADERC)
 list(APPEND LIBVKVIDEOENCODER_SRC
     ${VK_VIDEO_COMMON_LIBS_SOURCE_ROOT}/VkCodecUtils/VulkanShaderCompiler.cpp
     ${VK_VIDEO_COMMON_LIBS_SOURCE_ROOT}/VkCodecUtils/VulkanShaderCompiler.h
@@ -108,7 +108,7 @@ include_directories(BEFORE ${VK_VIDEO_COMMON_LIBS_SOURCE_ROOT})
 set(LIBVKVIDEOENCODER_DEPENDENCIES GenerateDispatchTables ${VULKAN_VIDEO_PARSER_LIB})
 add_library(${VULKAN_VIDEO_ENCODER_LIB} SHARED ${LIBVKVIDEOENCODER_SRC})
 
-if(USE_SHADERC)
+if(USE_ENCODER_SHADERC)
     # Link the libraries
     target_link_libraries(${VULKAN_VIDEO_ENCODER_LIB} PUBLIC ${SHADERC_SHARED_LIBRARY})
     # Ensure the library depends on the generation of these files
@@ -137,7 +137,7 @@ if(WIN32)
 endif()
 
 add_library(${VULKAN_VIDEO_ENCODER_STATIC_LIB} STATIC ${LIBVKVIDEOENCODER_SRC})
-if(USE_SHADERC)
+if(USE_ENCODER_SHADERC)
     # Link the libraries
     target_link_libraries(${VULKAN_VIDEO_ENCODER_STATIC_LIB} PUBLIC ${SHADERC_SHARED_LIBRARY})
 endif()
diff --git a/vk_video_encoder/libs/VkVideoEncoder/VkEncoderConfig.cpp b/vk_video_encoder/libs/VkVideoEncoder/VkEncoderConfig.cpp
index 53d7cec3..fdfe92de 100644
--- a/vk_video_encoder/libs/VkVideoEncoder/VkEncoderConfig.cpp
+++ b/vk_video_encoder/libs/VkVideoEncoder/VkEncoderConfig.cpp
@@ -19,7 +19,7 @@
 #include "VkVideoEncoder/VkEncoderConfigH265.h"
 #include "VkVideoEncoder/VkEncoderConfigAV1.h"
 
-void printHelp(VkVideoCodecOperationFlagBitsKHR codec)
+static void printHelp(VkVideoCodecOperationFlagBitsKHR codec)
 {
     fprintf(stderr,
     "Version: " VKVS_VERSION_STRING "\n"\
@@ -156,10 +156,10 @@ void printHelp(VkVideoCodecOperationFlagBitsKHR codec)
         }
 }
 
-int EncoderConfig::ParseArguments(int argc, char *argv[])
+int EncoderConfig::ParseArguments(int argc, const char *argv[])
 {
     int argcount = 0;
-    std::vector<char*> arglist;
+    std::vector<const char*> arglist;
     std::vector<std::string> args(argv, argv + argc);
     uint32_t frameCount = 0;
 
@@ -572,7 +572,7 @@ int EncoderConfig::ParseArguments(int argc, char *argv[])
             gopStructure.SetIntraRefreshSkippedStartIndex(intraRefreshSkippedStartIndex);
         } else {
             argcount++;
-            arglist.push_back((char*)args[i].c_str());
+            arglist.push_back(args[i].c_str());
         }
     }
 
@@ -703,7 +703,7 @@ int EncoderConfig::ParseArguments(int argc, char *argv[])
     return DoParseArguments(argcount, arglist.data());
 }
 
-VkResult EncoderConfig::CreateCodecConfig(int argc, char *argv[],
+VkResult EncoderConfig::CreateCodecConfig(int argc, const char *argv[],
                                           VkSharedBaseObj<EncoderConfig>& encoderConfig)
 {
 
diff --git a/vk_video_encoder/libs/VkVideoEncoder/VkEncoderConfig.h b/vk_video_encoder/libs/VkVideoEncoder/VkEncoderConfig.h
index 94adb438..896c1636 100644
--- a/vk_video_encoder/libs/VkVideoEncoder/VkEncoderConfig.h
+++ b/vk_video_encoder/libs/VkVideoEncoder/VkEncoderConfig.h
@@ -913,13 +913,13 @@ struct EncoderConfig : public VkVideoRefCountBase {
     }
 
     // Factory Function
-    static VkResult CreateCodecConfig(int argc, char *argv[], VkSharedBaseObj<EncoderConfig>& encoderConfig);
+    static VkResult CreateCodecConfig(int argc, const char *argv[], VkSharedBaseObj<EncoderConfig>& encoderConfig);
 
     void InitVideoProfile();
 
-    int ParseArguments(int argc, char *argv[]);
+    int ParseArguments(int argc, const char *argv[]);
 
-    virtual int DoParseArguments(int argc, char *argv[]) {
+    virtual int DoParseArguments(int argc, const char *argv[]) {
         if (argc > 0) {
             std::cout << "Invalid paramters: ";
             for (int i = 0; i < argc; i++) {
diff --git a/vk_video_encoder/libs/VkVideoEncoder/VkEncoderConfigAV1.cpp b/vk_video_encoder/libs/VkVideoEncoder/VkEncoderConfigAV1.cpp
index aeab421d..c3ba67c1 100644
--- a/vk_video_encoder/libs/VkVideoEncoder/VkEncoderConfigAV1.cpp
+++ b/vk_video_encoder/libs/VkVideoEncoder/VkEncoderConfigAV1.cpp
@@ -26,7 +26,7 @@
     }                                                           \
 }
 
-int EncoderConfigAV1::DoParseArguments(int argc, char* argv[])
+int EncoderConfigAV1::DoParseArguments(int argc, const char* argv[])
 {
     // No validation of command line options.  So, all options must be valid and
     // values with in the limits of vulkan and av1 specification
diff --git a/vk_video_encoder/libs/VkVideoEncoder/VkEncoderConfigAV1.h b/vk_video_encoder/libs/VkVideoEncoder/VkEncoderConfigAV1.h
index 0838e2c8..622977d6 100644
--- a/vk_video_encoder/libs/VkVideoEncoder/VkEncoderConfigAV1.h
+++ b/vk_video_encoder/libs/VkVideoEncoder/VkEncoderConfigAV1.h
@@ -88,7 +88,7 @@ struct EncoderConfigAV1 : public EncoderConfig {
     }
     virtual ~EncoderConfigAV1() {}
 
-    virtual int DoParseArguments(int argc, char* argv[]) override;
+    virtual int DoParseArguments(int argc, const char* argv[]) override;
 
     virtual VkResult InitializeParameters() override
     {
diff --git a/vk_video_encoder/libs/VkVideoEncoder/VkEncoderConfigH264.cpp b/vk_video_encoder/libs/VkVideoEncoder/VkEncoderConfigH264.cpp
index 68829578..e9c94bed 100644
--- a/vk_video_encoder/libs/VkVideoEncoder/VkEncoderConfigH264.cpp
+++ b/vk_video_encoder/libs/VkVideoEncoder/VkEncoderConfigH264.cpp
@@ -17,7 +17,7 @@
 #include "VkVideoEncoder/VkEncoderConfigH264.h"
 #include "VkVideoEncoder/VkVideoEncoderH264.h"
 
-int EncoderConfigH264::DoParseArguments(int argc, char* argv[])
+int EncoderConfigH264::DoParseArguments(int argc, const char* argv[])
 {
     std::vector<std::string> args(argv, argv + argc);
 
diff --git a/vk_video_encoder/libs/VkVideoEncoder/VkEncoderConfigH264.h b/vk_video_encoder/libs/VkVideoEncoder/VkEncoderConfigH264.h
index fb1c0611..6d8865a5 100644
--- a/vk_video_encoder/libs/VkVideoEncoder/VkEncoderConfigH264.h
+++ b/vk_video_encoder/libs/VkVideoEncoder/VkEncoderConfigH264.h
@@ -156,7 +156,7 @@ struct EncoderConfigH264 : public EncoderConfig {
     const LevelLimits* levelLimits;
     size_t levelLimitsSize;
 
-    virtual int DoParseArguments(int argc, char* argv[]) override;
+    virtual int DoParseArguments(int argc, const char* argv[]) override;
 
     StdVideoH264LevelIdc DetermineLevel(uint8_t dpbSize,
                                         uint32_t bitrate,
diff --git a/vk_video_encoder/libs/VkVideoEncoder/VkEncoderConfigH265.cpp b/vk_video_encoder/libs/VkVideoEncoder/VkEncoderConfigH265.cpp
index b4a03ce1..33bcc53e 100644
--- a/vk_video_encoder/libs/VkVideoEncoder/VkEncoderConfigH265.cpp
+++ b/vk_video_encoder/libs/VkVideoEncoder/VkEncoderConfigH265.cpp
@@ -68,7 +68,7 @@ uint32_t EncoderConfigH265::GetCpbVclFactor()
     return baseFactor + depthFactor;
 }
 
-int EncoderConfigH265::DoParseArguments(int argc, char* argv[])
+int EncoderConfigH265::DoParseArguments(int argc, const char* argv[])
 {
     std::vector<std::string> args(argv, argv + argc);
 
diff --git a/vk_video_encoder/libs/VkVideoEncoder/VkEncoderConfigH265.h b/vk_video_encoder/libs/VkVideoEncoder/VkEncoderConfigH265.h
index ebc5ca38..774bf1a9 100644
--- a/vk_video_encoder/libs/VkVideoEncoder/VkEncoderConfigH265.h
+++ b/vk_video_encoder/libs/VkVideoEncoder/VkEncoderConfigH265.h
@@ -135,7 +135,7 @@ struct EncoderConfigH265 : public EncoderConfig {
         return this;
     }
 
-    virtual int DoParseArguments(int argc, char* argv[]) override;
+    virtual int DoParseArguments(int argc, const char* argv[]) override;
 
     uint32_t GetCtbAlignedPicSizeInSamples(uint32_t& picWidthInCtbsY, uint32_t& picHeightInCtbsY, bool minCtbsY = false);
 
diff --git a/vk_video_encoder/libs/VkVideoEncoder/VkEncoderDpbH264.h b/vk_video_encoder/libs/VkVideoEncoder/VkEncoderDpbH264.h
index a54bed3c..c828c3c7 100644
--- a/vk_video_encoder/libs/VkVideoEncoder/VkEncoderDpbH264.h
+++ b/vk_video_encoder/libs/VkVideoEncoder/VkEncoderDpbH264.h
@@ -139,7 +139,7 @@ class VkEncDpbH264
     const StdVideoEncodeH264PictureInfo *GetCurrentDpbEntry(void)
     {
         assert((m_currDpbIdx < m_max_dpb_size) || (m_currDpbIdx == MAX_DPB_SLOTS));
-        return &m_DPB[m_currDpbIdx].picInfo;
+        return &m_DPB[(int)m_currDpbIdx].picInfo;
     }
 
     uint32_t GetUpdatedFrameNumAndPicOrderCnt(int32_t& PicOrderCnt)
diff --git a/vk_video_encoder/libs/VkVideoEncoder/VkVideoEncoder.cpp b/vk_video_encoder/libs/VkVideoEncoder/VkVideoEncoder.cpp
index 8649df07..84d96deb 100644
--- a/vk_video_encoder/libs/VkVideoEncoder/VkVideoEncoder.cpp
+++ b/vk_video_encoder/libs/VkVideoEncoder/VkVideoEncoder.cpp
@@ -164,96 +164,100 @@ VkResult VkVideoEncoder::LoadNextFrame(VkSharedBaseObj<VkVideoEncodeFrameInfo>&
 
     const uint8_t* pInputFrameData = m_encoderConfig->inputFileHandler.GetMappedPtr(m_encoderConfig->input.fullImageSize, encodeFrameInfo->frameInputOrderNum);
 
+    // NOTE: Get image layout
     const VkSubresourceLayout* dstSubresourceLayout = dstImageResource->GetSubresourceLayout();
 
-    int yCbCrConvResult = 0;
-    if (m_encoderConfig->input.bpp == 8) {
-
-        if (m_encoderConfig->encodeChromaSubsampling == VK_VIDEO_CHROMA_SUBSAMPLING_444_BIT_KHR) {
-            // Load current 8-bit frame from file and convert to 2-plane YUV444
-            yCbCrConvResult = YCbCrConvUtilsCpu<uint8_t>::I444ToP444(
-                    pInputFrameData + m_encoderConfig->input.planeLayouts[0].offset,         // src_y
-                    (int)m_encoderConfig->input.planeLayouts[0].rowPitch,                    // src_stride_y
-                    pInputFrameData + m_encoderConfig->input.planeLayouts[1].offset,         // src_u
-                    (int)m_encoderConfig->input.planeLayouts[1].rowPitch,                    // src_stride_u
-                    pInputFrameData + m_encoderConfig->input.planeLayouts[2].offset,         // src_v
-                    (int)m_encoderConfig->input.planeLayouts[2].rowPitch,                    // src_stride_v
-                    writeImagePtr + dstSubresourceLayout[0].offset,                          // dst_y
-                    (int)dstSubresourceLayout[0].rowPitch,                                   // dst_stride_y
-                    writeImagePtr + dstSubresourceLayout[1].offset,                          // dst_uv
-                    (int)dstSubresourceLayout[1].rowPitch,                                   // dst_stride_uv
-                    std::min(m_encoderConfig->encodeWidth,  m_encoderConfig->input.width),   // width
-                    std::min(m_encoderConfig->encodeHeight, m_encoderConfig->input.height)); // height
-        } else {
-            // Load current 8-bit frame from file and convert to NV12
-            yCbCrConvResult = YCbCrConvUtilsCpu<uint8_t>::I420ToNV12(
-                    pInputFrameData + m_encoderConfig->input.planeLayouts[0].offset,         // src_y,
-                    (int)m_encoderConfig->input.planeLayouts[0].rowPitch,                    // src_stride_y,
-                    pInputFrameData + m_encoderConfig->input.planeLayouts[1].offset,         // src_u,
-                    (int)m_encoderConfig->input.planeLayouts[1].rowPitch,                    // src_stride_u,
-                    pInputFrameData + m_encoderConfig->input.planeLayouts[2].offset,         // src_v,
-                    (int)m_encoderConfig->input.planeLayouts[2].rowPitch,                    // src_stride_v,
-                    writeImagePtr + dstSubresourceLayout[0].offset,                          // dst_y,
-                    (int)dstSubresourceLayout[0].rowPitch,                                   // dst_stride_y,
-                    writeImagePtr + dstSubresourceLayout[1].offset,                          // dst_uv,
-                    (int)dstSubresourceLayout[1].rowPitch,                                   // dst_stride_uv,
-                    std::min(m_encoderConfig->encodeWidth,  m_encoderConfig->input.width),   // width
-                    std::min(m_encoderConfig->encodeHeight, m_encoderConfig->input.height)); // height
-        }
-
-    } else if (m_encoderConfig->input.bpp == 10) { // 10-bit - actually 16-bit only for now.
+    const uint32_t width  = std::min(m_encoderConfig->encodeWidth,  m_encoderConfig->input.width);
+    const uint32_t height = std::min(m_encoderConfig->encodeHeight, m_encoderConfig->input.height);
+
+    if (m_inputComputeFilter != nullptr) {
+        // Compute filter available: direct plane copy, GPU filter handles conversion
+        CopyYCbCrPlanesDirectCPU(
+                pInputFrameData,                                           // Source buffer
+                m_encoderConfig->input.planeLayouts,                       // Source layouts
+                writeImagePtr,                                             // Destination buffer
+                dstSubresourceLayout,                                      // Destination layouts
+                width, height,
+                m_encoderConfig->input.numPlanes,                          // Number of planes
+                m_encoderConfig->input.vkFormat);                          // Format for subsampling detection
+    } else {
+        // No compute filter: CPU conversion from 3-plane to 2-plane format
+        int yCbCrConvResult = 0;
+        if (m_encoderConfig->input.bpp == 8) {
+            if (m_encoderConfig->encodeChromaSubsampling == VK_VIDEO_CHROMA_SUBSAMPLING_444_BIT_KHR) {
+                yCbCrConvResult = YCbCrConvUtilsCpu<uint8_t>::I444ToP444(
+                        pInputFrameData + m_encoderConfig->input.planeLayouts[0].offset,
+                        (int)m_encoderConfig->input.planeLayouts[0].rowPitch,
+                        pInputFrameData + m_encoderConfig->input.planeLayouts[1].offset,
+                        (int)m_encoderConfig->input.planeLayouts[1].rowPitch,
+                        pInputFrameData + m_encoderConfig->input.planeLayouts[2].offset,
+                        (int)m_encoderConfig->input.planeLayouts[2].rowPitch,
+                        writeImagePtr + dstSubresourceLayout[0].offset,
+                        (int)dstSubresourceLayout[0].rowPitch,
+                        writeImagePtr + dstSubresourceLayout[1].offset,
+                        (int)dstSubresourceLayout[1].rowPitch,
+                        width, height);
+            } else {
+                yCbCrConvResult = YCbCrConvUtilsCpu<uint8_t>::I420ToNV12(
+                        pInputFrameData + m_encoderConfig->input.planeLayouts[0].offset,
+                        (int)m_encoderConfig->input.planeLayouts[0].rowPitch,
+                        pInputFrameData + m_encoderConfig->input.planeLayouts[1].offset,
+                        (int)m_encoderConfig->input.planeLayouts[1].rowPitch,
+                        pInputFrameData + m_encoderConfig->input.planeLayouts[2].offset,
+                        (int)m_encoderConfig->input.planeLayouts[2].rowPitch,
+                        writeImagePtr + dstSubresourceLayout[0].offset,
+                        (int)dstSubresourceLayout[0].rowPitch,
+                        writeImagePtr + dstSubresourceLayout[1].offset,
+                        (int)dstSubresourceLayout[1].rowPitch,
+                        width, height);
+            }
+        } else if (m_encoderConfig->input.bpp == 10 || m_encoderConfig->input.bpp == 12) {
+            int shiftBits = 0;
+            if (m_encoderConfig->input.msbShift >= 0) {
+                shiftBits = m_encoderConfig->input.msbShift;
+            } else {
+                shiftBits = 16 - m_encoderConfig->input.bpp;
+            }
 
-        int shiftBits = 0;
-        if (m_encoderConfig->input.msbShift >= 0) {
-            shiftBits = m_encoderConfig->input.msbShift;
+            if (m_encoderConfig->encodeChromaSubsampling == VK_VIDEO_CHROMA_SUBSAMPLING_444_BIT_KHR) {
+                yCbCrConvResult = YCbCrConvUtilsCpu<uint16_t>::I444ToP444(
+                        (const uint16_t*)(pInputFrameData + m_encoderConfig->input.planeLayouts[0].offset),
+                        (int)m_encoderConfig->input.planeLayouts[0].rowPitch,
+                        (const uint16_t*)(pInputFrameData + m_encoderConfig->input.planeLayouts[1].offset),
+                        (int)m_encoderConfig->input.planeLayouts[1].rowPitch,
+                        (const uint16_t*)(pInputFrameData + m_encoderConfig->input.planeLayouts[2].offset),
+                        (int)m_encoderConfig->input.planeLayouts[2].rowPitch,
+                        (uint16_t*)(writeImagePtr + dstSubresourceLayout[0].offset),
+                        (int)dstSubresourceLayout[0].rowPitch,
+                        (uint16_t*)(writeImagePtr + dstSubresourceLayout[1].offset),
+                        (int)dstSubresourceLayout[1].rowPitch,
+                        width, height, shiftBits);
+            } else {
+                yCbCrConvResult = YCbCrConvUtilsCpu<uint16_t>::I420ToNV12(
+                        (const uint16_t*)(pInputFrameData + m_encoderConfig->input.planeLayouts[0].offset),
+                        (int)m_encoderConfig->input.planeLayouts[0].rowPitch,
+                        (const uint16_t*)(pInputFrameData + m_encoderConfig->input.planeLayouts[1].offset),
+                        (int)m_encoderConfig->input.planeLayouts[1].rowPitch,
+                        (const uint16_t*)(pInputFrameData + m_encoderConfig->input.planeLayouts[2].offset),
+                        (int)m_encoderConfig->input.planeLayouts[2].rowPitch,
+                        (uint16_t*)(writeImagePtr + dstSubresourceLayout[0].offset),
+                        (int)dstSubresourceLayout[0].rowPitch,
+                        (uint16_t*)(writeImagePtr + dstSubresourceLayout[1].offset),
+                        (int)dstSubresourceLayout[1].rowPitch,
+                        width, height, shiftBits);
+            }
         } else {
-            shiftBits = 16 - m_encoderConfig->input.bpp;
+            assert(!"Requested bit-depth is not supported!");
+            return VK_ERROR_INITIALIZATION_FAILED;
         }
 
-        if (m_encoderConfig->encodeChromaSubsampling == VK_VIDEO_CHROMA_SUBSAMPLING_444_BIT_KHR) {
-            // Load current 10-bit frame from file and convert to 2-plane YUV444
-            yCbCrConvResult = YCbCrConvUtilsCpu<uint16_t>::I444ToP444(
-                    (const uint16_t*)(pInputFrameData + m_encoderConfig->input.planeLayouts[0].offset), // src_y
-                    (int)m_encoderConfig->input.planeLayouts[0].rowPitch,                               // src_stride_y
-                    (const uint16_t*)(pInputFrameData + m_encoderConfig->input.planeLayouts[1].offset), // src_u
-                    (int)m_encoderConfig->input.planeLayouts[1].rowPitch,                               // src_stride_u
-                    (const uint16_t*)(pInputFrameData + m_encoderConfig->input.planeLayouts[2].offset), // src_v
-                    (int)m_encoderConfig->input.planeLayouts[2].rowPitch,                               // src_stride_v
-                    (uint16_t*)(writeImagePtr + dstSubresourceLayout[0].offset),                        // dst_y
-                    (int)dstSubresourceLayout[0].rowPitch,                                              // dst_stride_y
-                    (uint16_t*)(writeImagePtr + dstSubresourceLayout[1].offset),                        // dst_uv
-                    (int)dstSubresourceLayout[1].rowPitch,                                              // dst_stride_uv
-                    std::min(m_encoderConfig->encodeWidth,  m_encoderConfig->input.width),              // width
-                    std::min(m_encoderConfig->encodeHeight, m_encoderConfig->input.height),             // height
-                    shiftBits);
-        } else {
-            // Load current 10-bit frame from file and convert to P010/P016
-            yCbCrConvResult = YCbCrConvUtilsCpu<uint16_t>::I420ToNV12(
-                    (const uint16_t*)(pInputFrameData + m_encoderConfig->input.planeLayouts[0].offset), // src_y,
-                    (int)m_encoderConfig->input.planeLayouts[0].rowPitch,                               // src_stride_y,
-                    (const uint16_t*)(pInputFrameData + m_encoderConfig->input.planeLayouts[1].offset), // src_u,
-                    (int)m_encoderConfig->input.planeLayouts[1].rowPitch,                               // src_stride_u,
-                    (const uint16_t*)(pInputFrameData + m_encoderConfig->input.planeLayouts[2].offset), // src_v,
-                    (int)m_encoderConfig->input.planeLayouts[2].rowPitch,                               // src_stride_v,
-                    (uint16_t*)(writeImagePtr + dstSubresourceLayout[0].offset),                        // dst_y,
-                    (int)dstSubresourceLayout[0].rowPitch,                                              // dst_stride_y,
-                    (uint16_t*)(writeImagePtr + dstSubresourceLayout[1].offset),                        // dst_uv,
-                    (int)dstSubresourceLayout[1].rowPitch,                                              // dst_stride_uv,
-                    std::min(m_encoderConfig->encodeWidth,  m_encoderConfig->input.width),              // width
-                    std::min(m_encoderConfig->encodeHeight, m_encoderConfig->input.height),             // height
-                    shiftBits);
+        if (yCbCrConvResult != 0) {
+            return VK_ERROR_INITIALIZATION_FAILED;
         }
-
-    } else {
-        assert(!"Requested bit-depth is not supported!");
-    }
-
-    if (yCbCrConvResult == 0) {
-        // On success, stage the input frame for the encoder video input
-        return StageInputFrame(encodeFrameInfo);
     }
 
-    return VK_ERROR_INITIALIZATION_FAILED;
+    // Now stage the input frame for the encoder video input
+    return StageInputFrame(encodeFrameInfo);
 }
 
 VkResult VkVideoEncoder::StageInputFrameQpMap(VkSharedBaseObj<VkVideoEncodeFrameInfo>& encodeFrameInfo,
@@ -479,6 +483,123 @@ VkResult VkVideoEncoder::SubmitStagedQpMap(VkSharedBaseObj<VkVideoEncodeFrameInf
     return result;
 }
 
+/**
+ * @brief Copies YCbCr planes directly from input buffer to output buffer when formats are the same
+ *
+ * This function efficiently copies YCbCr data between buffers when the number of planes
+ * and bit depth are identical, but potentially with different pitch values. It handles
+ * 1, 2, or 3 plane formats and supports 8-bit and high bit-depth formats (10, 12, 16 bit).
+ * Properly handles different chroma subsampling (4:4:4, 4:2:2, 4:2:0).
+ *
+ * @param pInputFrameData Source buffer containing YCbCr planes
+ * @param inputPlaneLayouts Array of source buffer plane layouts (offset, pitch, etc.)
+ * @param writeImagePtr Destination buffer for the YCbCr planes
+ * @param dstSubresourceLayout Array of destination buffer plane layouts
+ * @param width Width of the image in pixels
+ * @param height Height of the image in pixels
+ * @param numPlanes Number of planes in the format (1, 2, or 3)
+ * @param format The VkFormat of the image for proper subsampling and bit depth detection
+ */
+void VkVideoEncoder::CopyYCbCrPlanesDirectCPU(
+    const uint8_t* pInputFrameData,
+    const VkSubresourceLayout* inputPlaneLayouts,
+    uint8_t* writeImagePtr,
+    const VkSubresourceLayout* dstSubresourceLayout,
+    uint32_t width,
+    uint32_t height,
+    uint32_t numPlanes,
+    VkFormat format)
+{
+    // Get format information
+    const VkMpFormatInfo* formatInfo = YcbcrVkFormatInfo(format);
+
+    // Determine bit depth and bytes per pixel from format
+    const uint32_t bitDepth = (formatInfo != nullptr) ? GetBitsPerChannel(formatInfo->planesLayout) : 8; // Default to 8-bit
+    const uint32_t bytesPerPixel = (bitDepth > 8) ? 2 : 1;
+
+    // Determine chroma subsampling ratios
+    const uint32_t chromaHorzRatio = (formatInfo != nullptr) ? (1 << formatInfo->planesLayout.secondaryPlaneSubsampledX) : 1;
+    const uint32_t chromaVertRatio = (formatInfo != nullptr) ? (1 << formatInfo->planesLayout.secondaryPlaneSubsampledY) : 1;
+
+    // Log the format subsampling for debugging
+    if (m_encoderConfig->verbose) {
+        const char* subsamplingDesc = "4:4:4";
+        if (chromaHorzRatio == 2 && chromaVertRatio == 2) {
+            subsamplingDesc = "4:2:0";
+        } else if (chromaHorzRatio == 2 && chromaVertRatio == 1) {
+            subsamplingDesc = "4:2:2";
+        }
+        printf("YCbCr copy with %s subsampling (chromaHorzRatio=%d, chromaVertRatio=%d), %d-bit\n",
+               subsamplingDesc, chromaHorzRatio, chromaVertRatio, bitDepth);
+    }
+
+    // Handle all planes
+    for (uint32_t plane = 0; plane < numPlanes; plane++) {
+        // Source and destination plane pointers
+        const uint8_t* srcPlane = pInputFrameData + inputPlaneLayouts[plane].offset;
+        uint8_t* dstPlane = writeImagePtr + dstSubresourceLayout[plane].offset;
+
+        // Get plane dimensions - adjust for chroma planes
+        uint32_t planeWidth = width;
+        uint32_t planeHeight = height;
+
+        // Adjust dimensions for chroma planes based on format subsampling
+        if (plane > 0) {
+            if (chromaHorzRatio > 1) {
+                planeWidth = (width + chromaHorzRatio - 1) / chromaHorzRatio;
+            }
+            if (chromaVertRatio > 1) {
+                planeHeight = (height + chromaVertRatio - 1) / chromaVertRatio;
+            }
+        }
+
+        // Source and destination strides
+        assert(inputPlaneLayouts[plane].rowPitch <= SIZE_MAX);
+        assert(dstSubresourceLayout[plane].rowPitch <= SIZE_MAX);
+        const size_t srcStride = (size_t)inputPlaneLayouts[plane].rowPitch;
+        const size_t dstStride = (size_t)dstSubresourceLayout[plane].rowPitch;
+
+        // Line width in bytes
+        const size_t lineBytes = planeWidth * bytesPerPixel;
+
+        // Get the starting pointers for this plane
+        const uint8_t* srcRow = srcPlane;
+        uint8_t* dstRow = dstPlane;
+
+        if (false && (bitDepth > 8)) {
+
+            const int shiftBits = 16 - bitDepth;
+
+            // Copy each line, incrementing pointers by stride amounts
+            for (uint32_t y = 0; y < planeHeight; y++) {
+
+                // Get the starting pointers for this row
+                const uint16_t* srcRow16 = (const uint16_t*)srcRow;
+                uint16_t* dstRow16 = (uint16_t*)dstRow;
+
+                for (uint32_t i = 0; i < planeWidth; i++) {
+                    *dstRow16++ = (*srcRow16++ << shiftBits);
+                }
+
+                // Advance to the next line using pointer arithmetic
+                srcRow += srcStride;
+                dstRow += dstStride;
+            }
+
+        } else {
+
+            // Copy each line, incrementing pointers by stride amounts
+            for (uint32_t y = 0; y < planeHeight; y++) {
+                // Copy the current line
+                memcpy(dstRow, srcRow, lineBytes);
+
+                // Advance to the next line using pointer arithmetic
+                srcRow += srcStride;
+                dstRow += dstStride;
+            }
+        }
+    }
+}
 
 VkResult VkVideoEncoder::SubmitStagedInputFrame(VkSharedBaseObj<VkVideoEncodeFrameInfo>& encodeFrameInfo)
 {
@@ -943,6 +1064,7 @@ VkResult VkVideoEncoder::InitEncoder(VkSharedBaseObj<EncoderConfig>& encoderConf
                                              VK_IMAGE_USAGE_TRANSFER_DST_BIT);
     const VkImageUsageFlags dpbImageUsage = VK_IMAGE_USAGE_VIDEO_ENCODE_DPB_BIT_KHR;
 
+    // NOTE: Create linearInputImage
     result =  VulkanVideoImagePool::Create(m_vkDevCtx, m_linearInputImagePool);
     if(result != VK_SUCCESS) {
         fprintf(stderr, "\nInitEncoder Error: Failed to create linearInputImagePool.\n");
@@ -954,9 +1076,21 @@ VkResult VkVideoEncoder::InitEncoder(VkSharedBaseObj<EncoderConfig>& encoderConf
         std::max(m_maxCodedExtent.height, encoderConfig->input.height)
     };
 
+    // When compute filter is available, the linear image stores raw input format
+    // and the filter handles conversion. Without it, the linear image must match
+    // the encode source format since CopyLinearToOptimalImage does no conversion.
+    const VkFormat linearImageFormat =
+#ifdef SHADERC_SUPPORT
+        encoderConfig->enablePreprocessComputeFilter
+            ? encoderConfig->input.vkFormat
+            : m_imageInFormat;
+#else
+        m_imageInFormat;
+#endif
+
     result = m_linearInputImagePool->Configure( m_vkDevCtx,
                                                 encoderConfig->numInputImages,
-                                                m_imageInFormat,
+                                                linearImageFormat,
                                                 linearInputImageExtent,
                                                   ( VK_IMAGE_USAGE_SAMPLED_BIT |
                                                     VK_IMAGE_USAGE_STORAGE_BIT |
@@ -1217,8 +1351,10 @@ VkResult VkVideoEncoder::InitEncoder(VkSharedBaseObj<EncoderConfig>& encoderConf
                                                 0, // queueIndex
                                                 encoderConfig->filterType,
                                                 encoderConfig->numInputImages,
-                                                m_imageInFormat,  // in filter format (can be RGB)
+                                                encoderConfig->input.vkFormat,  // in filter format (can be RGB)
                                                 m_imageInFormat,  // out filter - same as input for now.
+                                                false, // inputEnableMsbToLsbShift
+                                                (encoderConfig->input.msbShift > 0),
                                                 &ycbcrConversionCreateInfo,
                                                 &ycbcrPrimariesConstants,
                                                 &samplerInfo,
@@ -1405,7 +1541,9 @@ VkImageLayout VkVideoEncoder::TransitionImageLayout(VkCommandBuffer cmdBuf,
         imageBarrier.srcStageMask = VK_PIPELINE_STAGE_2_VIDEO_ENCODE_BIT_KHR;
         imageBarrier.dstStageMask = VK_PIPELINE_STAGE_2_VIDEO_ENCODE_BIT_KHR;
     } else {
+#ifdef __cpp_exceptions
         throw std::invalid_argument("unsupported layout transition!");
+#endif
     }
 
     const VkDependencyInfoKHR dependencyInfo = {
@@ -1449,8 +1587,9 @@ VkResult VkVideoEncoder::CopyLinearToOptimalImage(VkCommandBuffer& commandBuffer
     // Bind memory for the image.
     const VkMpFormatInfo* mpInfo = YcbcrVkFormatInfo(format);
 
-    // Currently formats that have more than 2 output planes are not supported. 444 formats have a shared CbCr planes in all current tests
-    assert((mpInfo->vkPlaneFormat[2] == VK_FORMAT_UNDEFINED) && (mpInfo->vkPlaneFormat[3] == VK_FORMAT_UNDEFINED));
+    // Determine number of planes: 1 (base) + numberOfExtraPlanes
+    const uint32_t numPlanes = 1 + mpInfo->planesLayout.numberOfExtraPlanes;
+    assert(numPlanes >= 1 && numPlanes <= 3);
 
     // Copy src buffer to image.
     VkImageCopy copyRegion[3]{};
@@ -1485,9 +1624,21 @@ VkResult VkVideoEncoder::CopyLinearToOptimalImage(VkCommandBuffer& commandBuffer
     copyRegion[1].dstSubresource.baseArrayLayer = dstCopyArrayLayer;
     copyRegion[1].dstSubresource.layerCount = 1;
 
+    if (numPlanes > 2) {
+        copyRegion[2].extent = copyRegion[1].extent;
+        copyRegion[2].srcSubresource.aspectMask = VK_IMAGE_ASPECT_PLANE_2_BIT;
+        copyRegion[2].srcSubresource.mipLevel = 0;
+        copyRegion[2].srcSubresource.baseArrayLayer = srcCopyArrayLayer;
+        copyRegion[2].srcSubresource.layerCount = 1;
+        copyRegion[2].dstSubresource.aspectMask = VK_IMAGE_ASPECT_PLANE_2_BIT;
+        copyRegion[2].dstSubresource.mipLevel = 0;
+        copyRegion[2].dstSubresource.baseArrayLayer = dstCopyArrayLayer;
+        copyRegion[2].dstSubresource.layerCount = 1;
+    }
+
     m_vkDevCtx->CmdCopyImage(commandBuffer, srcImageResource->GetImage(), srcImageLayout,
                              dstImageResource->GetImage(), dstImageLayout,
-                             (uint32_t)2, copyRegion);
+                             numPlanes, copyRegion);
 
     {
         VkMemoryBarrier memoryBarrier = {VK_STRUCTURE_TYPE_MEMORY_BARRIER};
@@ -1610,12 +1761,9 @@ VkResult VkVideoEncoder::HandleCtrlCmd(VkSharedBaseObj<VkVideoEncodeFrameInfo>&
         encodeFrameInfo->qualityLevelInfo.sType  = VK_STRUCTURE_TYPE_VIDEO_ENCODE_QUALITY_LEVEL_INFO_KHR;
         encodeFrameInfo->qualityLevelInfo.qualityLevel = encodeFrameInfo->qualityLevel;
         if (pNext != nullptr) {
-            if (encodeFrameInfo->rateControlInfo.pNext == nullptr) {
-                encodeFrameInfo->rateControlInfo.pNext = pNext;
-            } else {
-                ((VkBaseInStructure*)(encodeFrameInfo->rateControlInfo.pNext))->pNext = pNext;
-            }
+            vk::ChainNextVkStruct(encodeFrameInfo->rateControlInfo, *pNext);
         }
+
         pNext = (VkBaseInStructure*)&encodeFrameInfo->qualityLevelInfo;
     }
 
@@ -1638,12 +1786,9 @@ VkResult VkVideoEncoder::HandleCtrlCmd(VkSharedBaseObj<VkVideoEncodeFrameInfo>&
         m_beginRateControlInfo = encodeFrameInfo->rateControlInfo;
 
         if (pNext != nullptr) {
-            if (encodeFrameInfo->rateControlInfo.pNext == nullptr) {
-                encodeFrameInfo->rateControlInfo.pNext = pNext;
-            } else {
-                ((VkBaseInStructure*)(encodeFrameInfo->rateControlInfo.pNext))->pNext = pNext;
-            }
+            vk::ChainNextVkStruct(encodeFrameInfo->rateControlInfo, *pNext);
         }
+
         pNext = (VkBaseInStructure*)&encodeFrameInfo->rateControlInfo;
     }
 
@@ -1723,7 +1868,8 @@ VkResult VkVideoEncoder::RecordVideoCodingCmd(VkSharedBaseObj<VkVideoEncodeFrame
         vkDevCtx->CmdControlVideoCodingKHR(cmdBuf, &renderControlInfo);
 
         m_beginRateControlInfo = *(VkVideoEncodeRateControlInfoKHR*)encodeFrameInfo->pControlCmdChain;
-        ((VkBaseInStructure*)(m_beginRateControlInfo.pNext))->pNext = NULL;
+        // Do not walk the chain, otherwise we end up creating a loop here.
+        m_beginRateControlInfo.pNext = (VkBaseInStructure*)(&encodeFrameInfo->pControlCmdChain);
     }
 
     if (m_videoMaintenance1FeaturesSupported)
@@ -1735,10 +1881,12 @@ VkResult VkVideoEncoder::RecordVideoCodingCmd(VkSharedBaseObj<VkVideoEncodeFrame
         videoInlineQueryInfoKHR.firstQuery = querySlotId;
         videoInlineQueryInfoKHR.queryCount = numQuerySamples;
         VkBaseInStructure* pStruct = (VkBaseInStructure*)&encodeFrameInfo->encodeInfo;
-        while (pStruct->pNext) pStruct = (VkBaseInStructure*)pStruct->pNext;
-        pStruct->pNext = (VkBaseInStructure*)&videoInlineQueryInfoKHR;
+        vk::ChainNextVkStruct(*pStruct, videoInlineQueryInfoKHR);
 
         vkDevCtx->CmdEncodeVideoKHR(cmdBuf, &encodeFrameInfo->encodeInfo);
+
+        // Remove the stack pointer from the chain, causes a use after free otherwise in GetEncodeFrameInfoH264
+        encodeFrameInfo->encodeInfo.pNext = videoInlineQueryInfoKHR.pNext;
     }
     else
     {
diff --git a/vk_video_encoder/libs/VkVideoEncoder/VkVideoEncoder.h b/vk_video_encoder/libs/VkVideoEncoder/VkVideoEncoder.h
index 61c2ec84..dacc2929 100644
--- a/vk_video_encoder/libs/VkVideoEncoder/VkVideoEncoder.h
+++ b/vk_video_encoder/libs/VkVideoEncoder/VkVideoEncoder.h
@@ -58,7 +58,7 @@ class VkVideoEncoder : public VkVideoRefCountBase {
     {
         VkStructureType GetType() {
             return (encodeInfo.pNext == nullptr) ?
-                    VK_STRUCTURE_TYPE_VIDEO_ENCODE_INFO_KHR : ((VkBaseInStructure*)encodeInfo.pNext)->sType;
+                    VK_STRUCTURE_TYPE_VIDEO_ENCODE_INFO_KHR : reinterpret_cast<const VkBaseInStructure*>(encodeInfo.pNext)->sType;
         }
 
         VkVideoEncodeFrameInfo(const void* pNext = nullptr)
@@ -559,6 +559,29 @@ class VkVideoEncoder : public VkVideoRefCountBase {
 
     const uint8_t* setPlaneOffset(const uint8_t* pFrameData, size_t bufferSize, size_t &currentReadOffset);
 
+    /**
+     * @brief Copies YCbCr planes directly from input buffer to output buffer when formats are the same
+     *
+     * @param pInputFrameData Source buffer containing YCbCr planes
+     * @param inputPlaneLayouts Array of source buffer plane layouts (offset, pitch, etc.)
+     * @param writeImagePtr Destination buffer for the YCbCr planes
+     * @param dstSubresourceLayout Array of destination buffer plane layouts
+     * @param width Width of the image in pixels
+     * @param height Height of the image in pixels
+     * @param numPlanes Number of planes in the format (1, 2, or 3)
+     * @param format The VkFormat of the image for proper subsampling and bit depth detection
+     * @return none
+     */
+    void CopyYCbCrPlanesDirectCPU(
+        const uint8_t* pInputFrameData,
+        const VkSubresourceLayout* inputPlaneLayouts,
+        uint8_t* writeImagePtr,
+        const VkSubresourceLayout* dstSubresourceLayout,
+        uint32_t width,
+        uint32_t height,
+        uint32_t numPlanes,
+        VkFormat format);
+
     bool WaitForThreadsToComplete();
 
 protected:
diff --git a/vk_video_encoder/libs/VkVideoEncoder/VkVideoGopStructure.h b/vk_video_encoder/libs/VkVideoEncoder/VkVideoGopStructure.h
index d3b1ab0a..2ab76bcd 100644
--- a/vk_video_encoder/libs/VkVideoEncoder/VkVideoGopStructure.h
+++ b/vk_video_encoder/libs/VkVideoEncoder/VkVideoGopStructure.h
@@ -25,6 +25,7 @@
 #include <functional>
 #include <iostream>
 #include <iomanip>
+#include <algorithm>  // for std::min
 
 static const uint32_t MAX_GOP_SIZE = 64;
 
@@ -207,15 +208,15 @@ class VkVideoGopStructure {
 
             uint32_t periodDelta = INT32_MAX; // the delta of this frame to the next closed GOP reference. -1 if it is not a B-frame
             if (framesLeft <= consecutiveBFrameCount) { // Handle last frames sequence
-                periodDelta = std::min(periodDelta, framesLeft);
+                periodDelta = std::min<uint32_t>(periodDelta, framesLeft);
             }
 
             if (m_idrPeriod > 0) { // Is the IDR period valid
-                periodDelta = std::min(periodDelta, GetPeriodDelta(gopState, m_idrPeriod));
+                periodDelta = std::min<uint32_t>(periodDelta, GetPeriodDelta(gopState, m_idrPeriod));
             }
 
             if (m_closedGop) { // A closed GOP is required.
-                periodDelta = std::min(periodDelta, GetPeriodDelta(gopState, m_gopFrameCount));
+                periodDelta = std::min<uint32_t>(periodDelta, GetPeriodDelta(gopState, m_gopFrameCount));
             }
 
             uint32_t refDelta = INT32_MAX;    // the delta of this frame from the last reference. -1 if it is not a B-frame
diff --git a/vk_video_encoder/src/vulkan_video_encoder.cpp b/vk_video_encoder/src/vulkan_video_encoder.cpp
index 61c3637d..ae44f7ce 100644
--- a/vk_video_encoder/src/vulkan_video_encoder.cpp
+++ b/vk_video_encoder/src/vulkan_video_encoder.cpp
@@ -23,7 +23,7 @@
 class VulkanVideoEncoderImpl : public VulkanVideoEncoder {
 public:
     virtual VkResult Initialize(VkVideoCodecOperationFlagBitsKHR videoCodecOperation,
-                                int argc, char** argv);
+                                int argc, const char** argv);
     virtual int64_t GetNumberOfFrames()
     {
         return m_encoderConfig->numFrames;
@@ -81,7 +81,7 @@ class VulkanVideoEncoderImpl : public VulkanVideoEncoder {
 };
 
 VkResult VulkanVideoEncoderImpl::Initialize(VkVideoCodecOperationFlagBitsKHR videoCodecOperation,
-                                            int argc, char** argv)
+                                            int argc, const char** argv)
 {
     VkResult result = EncoderConfig::CreateCodecConfig(argc, argv, m_encoderConfig);
     if (VK_SUCCESS != result) {
@@ -235,7 +235,7 @@ VkResult VulkanVideoEncoderImpl::EncodeNextFrame(int64_t& frameNumEncoded)
 
 VK_VIDEO_ENCODER_EXPORT
 VkResult CreateVulkanVideoEncoder(VkVideoCodecOperationFlagBitsKHR videoCodecOperation,
-                                  int argc, char** argv,
+                                  int argc, const char** argv,
                                   VkSharedBaseObj<VulkanVideoEncoder>& vulkanVideoEncoder)
 {
     switch((uint32_t)videoCodecOperation)
diff --git a/vk_video_encoder/test/vulkan-video-enc/Main.cpp b/vk_video_encoder/test/vulkan-video-enc/Main.cpp
index 58c5cb49..09f55420 100644
--- a/vk_video_encoder/test/vulkan-video-enc/Main.cpp
+++ b/vk_video_encoder/test/vulkan-video-enc/Main.cpp
@@ -18,7 +18,7 @@
 #include "vulkan_video_encoder.h"
 #include "VkVSCommon.h"
 
-int main(int argc, char** argv)
+int main(int argc, const char** argv)
 {
     std::cout << "Enter encoder test" << std::endl;
     VkSharedBaseObj<VulkanVideoEncoder> vulkanVideoEncoder;