From 882f13525ba0f4f9758e103d00cf4fc26ed1d175 Mon Sep 17 00:00:00 2001 From: Corey Williams Date: Tue, 6 Jan 2026 14:53:03 -0600 Subject: [PATCH 1/3] restarted PR --- include/nbl/asset/IAsset.h | 1 + include/nbl/asset/ICPUMeshPipeline.h | 145 +++++ .../nbl/builtin/hlsl/indirect_commands.hlsl | 3 + include/nbl/video/IGPUCommandBuffer.h | 24 +- include/nbl/video/IGPUCommandPool.h | 52 +- include/nbl/video/IGPUMeshPipeline.h | 165 +++++ include/nbl/video/ILogicalDevice.h | 34 +- include/nbl/video/asset_traits.h | 76 ++- src/nbl/CMakeLists.txt | 3 +- src/nbl/video/CVulkanCommandBuffer.cpp | 22 +- src/nbl/video/CVulkanCommandBuffer.h | 6 +- src/nbl/video/CVulkanLogicalDevice.cpp | 574 ++++++++++++------ src/nbl/video/CVulkanLogicalDevice.h | 16 +- src/nbl/video/CVulkanMeshPipeline.cpp | 27 + src/nbl/video/CVulkanMeshPipeline.h | 33 + src/nbl/video/CVulkanPhysicalDevice.cpp | 52 ++ src/nbl/video/IGPUCommandBuffer.cpp | 87 ++- src/nbl/video/ILogicalDevice.cpp | 441 ++++++++------ .../device_capabilities/device_features.json | 10 + .../device_capabilities/device_limits.json | 117 ++++ 20 files changed, 1461 insertions(+), 427 deletions(-) create mode 100644 include/nbl/asset/ICPUMeshPipeline.h create mode 100644 include/nbl/video/IGPUMeshPipeline.h create mode 100644 src/nbl/video/CVulkanMeshPipeline.cpp create mode 100644 src/nbl/video/CVulkanMeshPipeline.h diff --git a/include/nbl/asset/IAsset.h b/include/nbl/asset/IAsset.h index 7c6a33193d..bef9b151f6 100644 --- a/include/nbl/asset/IAsset.h +++ b/include/nbl/asset/IAsset.h @@ -95,6 +95,7 @@ class IAsset : virtual public core::IReferenceCounted ET_PIPELINE_CACHE = 1ull<<21, //!< asset::ICPUPipelineCache ET_SCENE = 1ull<<22, //!< reserved, to implement later ET_RAYTRACING_PIPELINE = 1ull << 23, //!< asset::ICPURayTracingPipeline + ET_MESH_PIPELINE = 1ull << 24, ET_IMPLEMENTATION_SPECIFIC_METADATA = 1ull<<31u, //!< lights, etc. //! Reserved special value used for things like terminating lists of this enum diff --git a/include/nbl/asset/ICPUMeshPipeline.h b/include/nbl/asset/ICPUMeshPipeline.h new file mode 100644 index 0000000000..1b48ed06f6 --- /dev/null +++ b/include/nbl/asset/ICPUMeshPipeline.h @@ -0,0 +1,145 @@ +#ifndef _NBL_I_CPU_MESH_PIPELINE_H_INCLUDED_ +#define _NBL_I_CPU_MESH_PIPELINE_H_INCLUDED_ + + +#include "nbl/asset/IMeshPipeline.h" +#include "nbl/asset/ICPURenderpass.h" +#include "nbl/asset/ICPUPipeline.h" + + +namespace nbl::asset +{ + +class ICPUMeshPipeline final : public ICPUPipeline> +{ + using pipeline_base_t = IMeshPipeline; + using base_t = ICPUPipeline; + + public: + + static core::smart_refctd_ptr create(ICPUPipelineLayout* layout, ICPURenderpass* renderpass = nullptr) + { + auto retval = new ICPUMeshPipeline(layout, renderpass); + return core::smart_refctd_ptr(retval,core::dont_grab); + } + + constexpr static inline auto AssetType = ET_MESH_PIPELINE; + inline E_TYPE getAssetType() const override { return AssetType; } + + inline const SCachedCreationParams& getCachedCreationParams() const + { + return pipeline_base_t::getCachedCreationParams(); + } + + inline SCachedCreationParams& getCachedCreationParams() + { + assert(isMutable()); + return m_params; + } + + inline std::span getSpecInfos(const hlsl::ShaderStage stage) const override final + { + switch (stage) { + case hlsl::ShaderStage::ESS_TASK: return { &m_specInfos[0], 1 }; + case hlsl::ShaderStage::ESS_MESH: return { &m_specInfos[1], 1 }; + case hlsl::ShaderStage::ESS_FRAGMENT: return { &m_specInfos[2], 1 }; + } + return {}; + } + + inline std::span getSpecInfos(const hlsl::ShaderStage stage) + { + return base_t::getSpecInfos(stage); + } + + SShaderSpecInfo* getSpecInfo(const hlsl::ShaderStage stage) + { + if (!isMutable()) return nullptr; + switch (stage) { + case hlsl::ShaderStage::ESS_TASK: return &m_specInfos[0]; + case hlsl::ShaderStage::ESS_MESH: return &m_specInfos[1]; + case hlsl::ShaderStage::ESS_FRAGMENT: return &m_specInfos[2]; + } + return nullptr; + } + + const SShaderSpecInfo* getSpecInfo(const hlsl::ShaderStage stage) const + { + const auto stageIndex = stageToIndex(stage); + if (stageIndex != -1) + return &m_specInfos[stageIndex]; + return nullptr; + } + + inline bool valid() const override + { + if (!m_layout) return false; + if (!m_layout->valid())return false; + + // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-dynamicRendering-06576 + if (!m_renderpass || m_params.subpassIx >= m_renderpass->getSubpassCount()) return false; + + core::bitflag stagePresence = {}; + for (auto shader_i = 0u; shader_i < m_specInfos.size(); shader_i++) + { + const auto& info = m_specInfos[shader_i]; + if (info.shader) + stagePresence |= indexToStage(shader_i); + } + return hasRequiredStages(stagePresence); + } + + protected: + using base_t::base_t; + virtual ~ICPUMeshPipeline() override = default; + + std::array m_specInfos; + + private: + explicit ICPUMeshPipeline(ICPUPipelineLayout* layout, ICPURenderpass* renderpass) + : base_t(layout, {}, renderpass) + {} + + static inline int8_t stageToIndex(const hlsl::ShaderStage stage) + { + const auto stageIx = hlsl::findLSB(stage); + if (stageIx < 0 || stageIx >= MESH_SHADER_STAGE_COUNT || hlsl::bitCount(stage)!=1) + return -1; + return stageIx; + } + + static inline hlsl::ShaderStage indexToStage(const int8_t index) + { + switch (index) { + case 0: return hlsl::ShaderStage::ESS_TASK; + case 1: return hlsl::ShaderStage::ESS_MESH; + case 2: return hlsl::ShaderStage::ESS_FRAGMENT; + } + return hlsl::ShaderStage::ESS_UNKNOWN; + } + + inline core::smart_refctd_ptr clone_impl(core::smart_refctd_ptr&& layout, uint32_t depth) const override final + { + auto* newPipeline = new ICPUMeshPipeline(layout.get(), m_renderpass.get()); + newPipeline->m_params = m_params; + + for (auto specInfo_i = 0u; specInfo_i < m_specInfos.size(); specInfo_i++) + { + newPipeline->m_specInfos[specInfo_i] = m_specInfos[specInfo_i].clone(depth); + } + + return core::smart_refctd_ptr(newPipeline, core::dont_grab); + } + + inline void visitDependents_impl(std::function visit) const override + { + if (!visit(m_layout.get())) return; + if (!visit(m_renderpass.get())) return; + for (const auto& info : m_specInfos) + if (!visit(info.shader.get())) return; + } +}; + +} + +#endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/indirect_commands.hlsl b/include/nbl/builtin/hlsl/indirect_commands.hlsl index ca8418bde7..89f79e4f86 100644 --- a/include/nbl/builtin/hlsl/indirect_commands.hlsl +++ b/include/nbl/builtin/hlsl/indirect_commands.hlsl @@ -37,6 +37,9 @@ struct DispatchIndirectCommand_t uint32_t num_groups_z; }; +// in vulkan this struct is distinct from DispatchIndirect, but has the same data - https://docs.vulkan.org/refpages/latest/refpages/source/VkDrawMeshTasksIndirectCommandEXT.html +using DrawMeshTasksIndirectCommand_t = DispatchIndirectCommand_t; + struct TraceRaysIndirectCommand_t { uint64_t raygenShaderRecordAddress; diff --git a/include/nbl/video/IGPUCommandBuffer.h b/include/nbl/video/IGPUCommandBuffer.h index bb6460754a..63552efa20 100644 --- a/include/nbl/video/IGPUCommandBuffer.h +++ b/include/nbl/video/IGPUCommandBuffer.h @@ -328,8 +328,10 @@ class NBL_API2 IGPUCommandBuffer : public IBackendObject bool copyAccelerationStructureFromMemory(const AccelerationStructure::DeviceCopyFromMemoryInfo& copyInfo); //! state setup - bool bindComputePipeline(const IGPUComputePipeline* const pipeline); bool bindGraphicsPipeline(const IGPUGraphicsPipeline* const pipeline); + bool bindComputePipeline(const IGPUComputePipeline* const pipeline); + bool bindMeshPipeline(const IGPUMeshPipeline* const pipeline); + bool bindRayTracingPipeline(const IGPURayTracingPipeline* const pipeline); bool bindDescriptorSets( const asset::E_PIPELINE_BIND_POINT pipelineBindPoint, const IGPUPipelineLayout* const layout, @@ -442,6 +444,12 @@ class NBL_API2 IGPUCommandBuffer : public IBackendObject } bool dispatchIndirect(const asset::SBufferBinding& binding); + bool drawMeshTasks(const uint32_t groupCountX, const uint32_t groupCountY = 1, const uint32_t groupCountZ = 1); + inline bool drawMeshTasks(const hlsl::vector groupCount) { + return drawMeshTasks(groupCount.x, groupCount.y, groupCount.z); + } + bool drawMeshTasksIndirect(const asset::SBufferBinding& binding, const uint32_t drawCount, uint32_t stride); + //! Begin/End RenderPasses struct SRenderpassBeginInfo { @@ -585,7 +593,7 @@ class NBL_API2 IGPUCommandBuffer : public IBackendObject virtual const void* getNativeHandle() const = 0; inline const core::unordered_map& getBoundDescriptorSetsRecord() const { return m_boundDescriptorSetsRecord; } - const IGPUGraphicsPipeline* getBoundGraphicsPipeline() const { return m_boundGraphicsPipeline; } + const IGPUPipelineBase* getBoundGraphicsPipeline() const { return m_boundRasterizationPipeline; } const IGPUComputePipeline* getBoundComputePipeline() const { return m_boundComputePipeline; } const IGPURayTracingPipeline* getBoundRayTracingPipeline() const { return m_boundRayTracingPipeline; } @@ -670,8 +678,9 @@ class NBL_API2 IGPUCommandBuffer : public IBackendObject virtual bool copyAccelerationStructureToMemory_impl(const IGPUAccelerationStructure* src, const asset::SBufferBinding& dst) = 0; virtual bool copyAccelerationStructureFromMemory_impl(const asset::SBufferBinding& src, IGPUAccelerationStructure* dst) = 0; - virtual bool bindComputePipeline_impl(const IGPUComputePipeline* const pipeline) = 0; virtual bool bindGraphicsPipeline_impl(const IGPUGraphicsPipeline* const pipeline) = 0; + virtual bool bindComputePipeline_impl(const IGPUComputePipeline* const pipeline) = 0; + virtual bool bindMeshPipeline_impl(const IGPUMeshPipeline* const pipeline) = 0; virtual bool bindRayTracingPipeline_impl(const IGPURayTracingPipeline* const pipeline) = 0; virtual bool bindDescriptorSets_impl( const asset::E_PIPELINE_BIND_POINT pipelineBindPoint, const IGPUPipelineLayout* const layout, @@ -715,6 +724,9 @@ class NBL_API2 IGPUCommandBuffer : public IBackendObject virtual bool drawIndirectCount_impl(const asset::SBufferBinding& indirectBinding, const asset::SBufferBinding& countBinding, const uint32_t maxDrawCount, const uint32_t stride) = 0; virtual bool drawIndexedIndirectCount_impl(const asset::SBufferBinding& indirectBinding, const asset::SBufferBinding& countBinding, const uint32_t maxDrawCount, const uint32_t stride) = 0; + virtual bool drawMeshTasks_impl(const uint32_t groupCountX, const uint32_t groupCountY, const uint32_t groupCountZ) = 0; + virtual bool drawMeshTasksIndirect_impl(const asset::SBufferBinding& binding, const uint32_t drawCount, const uint32_t stride) = 0; + virtual bool blitImage_impl(const IGPUImage* const srcImage, const IGPUImage::LAYOUT srcImageLayout, IGPUImage* const dstImage, const IGPUImage::LAYOUT dstImageLayout, const std::span regions, const IGPUSampler::E_TEXTURE_FILTER filter) = 0; virtual bool resolveImage_impl(const IGPUImage* const srcImage, const IGPUImage::LAYOUT srcImageLayout, IGPUImage* const dstImage, const IGPUImage::LAYOUT dstImageLayout, const uint32_t regionCount, const SImageResolve* pRegions) = 0; @@ -750,7 +762,7 @@ class NBL_API2 IGPUCommandBuffer : public IBackendObject m_boundDescriptorSetsRecord.clear(); m_TLASTrackingOps.clear(); - m_boundGraphicsPipeline= nullptr; + m_boundRasterizationPipeline= nullptr; m_boundComputePipeline= nullptr; m_boundRayTracingPipeline= nullptr; m_haveRtPipelineStackSize = false; @@ -768,7 +780,7 @@ class NBL_API2 IGPUCommandBuffer : public IBackendObject deleteCommandList(); m_boundDescriptorSetsRecord.clear(); m_TLASTrackingOps.clear(); - m_boundGraphicsPipeline= nullptr; + m_boundRasterizationPipeline= nullptr; m_boundComputePipeline= nullptr; m_boundRayTracingPipeline= nullptr; m_haveRtPipelineStackSize = false; @@ -929,7 +941,7 @@ class NBL_API2 IGPUCommandBuffer : public IBackendObject // operations as they'll be performed in order core::vector> m_TLASTrackingOps; - const IGPUGraphicsPipeline* m_boundGraphicsPipeline; + const IGPUPipelineBase* m_boundRasterizationPipeline; const IGPUComputePipeline* m_boundComputePipeline; const IGPURayTracingPipeline* m_boundRayTracingPipeline; diff --git a/include/nbl/video/IGPUCommandPool.h b/include/nbl/video/IGPUCommandPool.h index 0424ad83bd..35442b341b 100644 --- a/include/nbl/video/IGPUCommandPool.h +++ b/include/nbl/video/IGPUCommandPool.h @@ -8,8 +8,9 @@ #include "nbl/video/IEvent.h" #include "nbl/video/IGPUDescriptorSet.h" -#include "nbl/video/IGPUComputePipeline.h" #include "nbl/video/IGPUGraphicsPipeline.h" +#include "nbl/video/IGPUComputePipeline.h" +#include "nbl/video/IGPUMeshPipeline.h" #include "nbl/video/IGPURayTracingPipeline.h" #include "nbl/video/IGPUFramebuffer.h" #include "nbl/video/IQueryPool.h" @@ -125,7 +126,6 @@ class IGPUCommandPool : public IBackendObject class CBeginRenderPassCmd; class CPipelineBarrierCmd; class CBindDescriptorSetsCmd; - class CBindComputePipelineCmd; class CUpdateBufferCmd; class CResetQueryPoolCmd; class CWriteTimestampCmd; @@ -133,6 +133,9 @@ class IGPUCommandPool : public IBackendObject class CEndQueryCmd; class CCopyQueryPoolResultsCmd; class CBindGraphicsPipelineCmd; + class CBindComputePipelineCmd; + class CBindMeshPipelineCmd; + class CBindRayTracingPipelineCmd; class CPushConstantsCmd; class CBindVertexBuffersCmd; class CCopyBufferCmd; @@ -155,7 +158,6 @@ class IGPUCommandPool : public IBackendObject class CCopyAccelerationStructureToOrFromMemoryCmd; // for both vkCmdCopyAccelerationStructureToMemoryKHR and vkCmdCopyMemoryToAccelerationStructureKHR class CTraceRaysCmd; class CTraceRaysIndirectCmd; - class CBindRayTracingPipelineCmd; protected: IGPUCommandPool(core::smart_refctd_ptr&& dev, const core::bitflag _flags, const uint8_t _familyIx) @@ -529,15 +531,6 @@ class IGPUCommandPool::CBindDescriptorSetsCmd final : public IFixedSizeCommand m_sets[IGPUPipelineLayout::DESCRIPTOR_SET_COUNT]; }; -class IGPUCommandPool::CBindComputePipelineCmd final : public IFixedSizeCommand -{ - public: - CBindComputePipelineCmd(core::smart_refctd_ptr&& pipeline) : m_pipeline(std::move(pipeline)) {} - - private: - core::smart_refctd_ptr m_pipeline; -}; - class IGPUCommandPool::CUpdateBufferCmd final : public IFixedSizeCommand { public: @@ -604,6 +597,33 @@ class IGPUCommandPool::CBindGraphicsPipelineCmd final : public IFixedSizeCommand core::smart_refctd_ptr m_pipeline; }; +class IGPUCommandPool::CBindComputePipelineCmd final : public IFixedSizeCommand +{ + public: + CBindComputePipelineCmd(core::smart_refctd_ptr&& pipeline) : m_pipeline(std::move(pipeline)) {} + + private: + core::smart_refctd_ptr m_pipeline; +}; + +class IGPUCommandPool::CBindMeshPipelineCmd final : public IFixedSizeCommand +{ +public: + CBindMeshPipelineCmd(core::smart_refctd_ptr&& pipeline) : m_pipeline(std::move(pipeline)) {} + +private: + core::smart_refctd_ptr m_pipeline; +}; + +class IGPUCommandPool::CBindRayTracingPipelineCmd final : public IFixedSizeCommand +{ + public: + CBindRayTracingPipelineCmd(core::smart_refctd_ptr&& pipeline) : m_pipeline(std::move(pipeline)) {} + + private: + core::smart_refctd_ptr m_pipeline; +}; + class IGPUCommandPool::CPushConstantsCmd final : public IFixedSizeCommand { public: @@ -870,14 +890,6 @@ class IGPUCommandPool::CTraceRaysIndirectCmd final : public IFixedSizeCommand m_bindingBuffer; }; -class IGPUCommandPool::CBindRayTracingPipelineCmd final : public IFixedSizeCommand -{ - public: - CBindRayTracingPipelineCmd(core::smart_refctd_ptr&& pipeline) : m_pipeline(std::move(pipeline)) {} - - private: - core::smart_refctd_ptr m_pipeline; -}; NBL_ENUM_ADD_BITWISE_OPERATORS(IGPUCommandPool::CREATE_FLAGS) } diff --git a/include/nbl/video/IGPUMeshPipeline.h b/include/nbl/video/IGPUMeshPipeline.h new file mode 100644 index 0000000000..7878704978 --- /dev/null +++ b/include/nbl/video/IGPUMeshPipeline.h @@ -0,0 +1,165 @@ +#ifndef _NBL_I_GPU_MESH_PIPELINE_H_INCLUDED_ +#define _NBL_I_GPU_MESH_PIPELINE_H_INCLUDED_ + +#include "nbl/asset/IMeshPipeline.h" + +#include "nbl/video/IGPUPipelineLayout.h" +#include "nbl/video/IGPURenderpass.h" +#include "nbl/video/IGPUPipeline.h" + +//related spec + +//i feel like this MIGHT get stuffed into graphicspipeline but idk + +/* +https://registry.khronos.org/vulkan/specs/latest/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-PrimitiveId-06264 +** If the pipeline requires pre-rasterization shader state, it includes a mesh shader and the fragment shader code reads from an input variable that is decorated with PrimitiveId, then the mesh shader code must write to a matching output variable, decorated with PrimitiveId, in all execution paths + +https://registry.khronos.org/vulkan/specs/latest/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-renderPass-07064 +* If renderPass is not VK_NULL_HANDLE, the pipeline is being created with pre-rasterization shader state, subpass viewMask is not 0, and multiviewMeshShader is not enabled, then pStages must not include a mesh shader + +https://registry.khronos.org/vulkan/specs/latest/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-None-02322 +* If the pipeline requires pre-rasterization shader state, and there are any mesh shader stages in the pipeline there must not be any shader stage in the pipeline with a Xfb execution mode +*** whats a xfb + +https://registry.khronos.org/vulkan/specs/latest/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-shaderMeshEnqueue-10187 +* If the shaderMeshEnqueue feature is not enabled, shaders specified by pStages must not declare the ShaderEnqueueAMDX capability +https://registry.khronos.org/vulkan/specs/latest/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-flags-10188 +* If flags does not include VK_PIPELINE_CREATE_LIBRARY_BIT_KHR, shaders specified by pStages must not declare the ShaderEnqueueAMDX capability +*** my understanding is nabla strictly controls it's extensions, so this shouldnt be an issue + +https://registry.khronos.org/vulkan/specs/latest/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-pDynamicStates-07065 +* If the pipeline requires pre-rasterization shader state, and includes a mesh shader, there must be no element of the +* pDynamicStates member of pDynamicState set to VK_DYNAMIC_STATE_PRIMITIVE_TOPOLOGY, or VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE +*** this one seems the most relevant + +https://registry.khronos.org/vulkan/specs/latest/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-pDynamicStates-07066 +* If the pipeline requires pre-rasterization shader state, and includes a mesh shader, there must be no element of the +* pDynamicStates member of pDynamicState set to VK_DYNAMIC_STATE_PRIMITIVE_RESTART_ENABLE, or VK_DYNAMIC_STATE_PATCH_CONTROL_POINTS_EXT + +https://registry.khronos.org/vulkan/specs/latest/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-pDynamicStates-07067 +* If the pipeline requires pre-rasterization shader state, and includes a mesh shader, there must be no element of the pDynamicStates member of pDynamicState set to VK_DYNAMIC_STATE_VERTEX_INPUT_EXT + +https://registry.khronos.org/vulkan/specs/latest/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-renderPass-07720 +* If renderPass is VK_NULL_HANDLE, the pipeline is being created with pre-rasterization shader state, and +* VkPipelineRenderingCreateInfo::viewMask is not 0, and multiviewMeshShader is not enabled, then pStages must not include a mesh shader + + +* theres 1 or 2 more about pipeline libraries, but im not going to worry about that +*/ + +namespace nbl::video +{ + + class IGPUMeshPipeline : public IGPUPipeline> + { + using pipeline_t = asset::IMeshPipeline; + + public: + struct SCreationParams final : public SPipelineCreationParams + { + public: + #define base_flag(F) static_cast(pipeline_t::FLAGS::F) + enum class FLAGS : uint64_t + { + NONE = base_flag(NONE), + DISABLE_OPTIMIZATIONS = base_flag(DISABLE_OPTIMIZATIONS), + ALLOW_DERIVATIVES = base_flag(ALLOW_DERIVATIVES), + VIEW_INDEX_FROM_DEVICE_INDEX = 1<<3, + FAIL_ON_PIPELINE_COMPILE_REQUIRED = base_flag(FAIL_ON_PIPELINE_COMPILE_REQUIRED), + EARLY_RETURN_ON_FAILURE = base_flag(EARLY_RETURN_ON_FAILURE), + }; + #undef base_flag + + inline SSpecializationValidationResult valid() const + { + if (!layout) + return {}; + SSpecializationValidationResult retval = { .count = 0,.dataSize = 0 }; + + // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-dynamicRendering-06576 + if (!renderpass || cached.subpassIx >= renderpass->getSubpassCount()) + return {}; + + // TODO: check rasterization samples, etc. + //rp->getCreationParameters().subpasses[i] + + core::bitflag stagePresence = {}; + + auto processSpecInfo = [&](const SShaderSpecInfo& specInfo, hlsl::ShaderStage stage) + { + if (!specInfo.shader) return true; + if (!specInfo.accumulateSpecializationValidationResult(&retval)) return false; + stagePresence |= stage; + return true; + }; + if (!processSpecInfo(taskShader, hlsl::ShaderStage::ESS_TASK)) return {}; + if (!processSpecInfo(meshShader, hlsl::ShaderStage::ESS_MESH)) return {}; + if (!processSpecInfo(fragmentShader, hlsl::ShaderStage::ESS_FRAGMENT)) return {}; + + if (!hasRequiredStages(stagePresence)) + return {}; + + //if (!vertexShader.shader) return {}; //i dont quite understand why this line was in IGPUGraphics. checking if the shader itself was made correctly? + + return retval; + } + + inline core::bitflag getRequiredSubgroupStages() const + { + + core::bitflag stages = {}; + auto processSpecInfo = [&](const SShaderSpecInfo& spec, hlsl::ShaderStage stage) + { + if (spec.shader && spec.requiredSubgroupSize >= SUBGROUP_SIZE::REQUIRE_4) { + stages |= stage; + } + }; + processSpecInfo(taskShader, hlsl::ESS_TASK); + processSpecInfo(meshShader, hlsl::ESS_MESH); + processSpecInfo(fragmentShader, hlsl::ESS_FRAGMENT); + return stages; + } + + inline core::bitflag& getFlags() { return flags; } + + inline core::bitflag getFlags() const { return flags; } + + const IGPUPipelineLayout* layout = nullptr; + SShaderSpecInfo taskShader; + SShaderSpecInfo meshShader; + SShaderSpecInfo fragmentShader; + SCachedCreationParams cached = {}; + renderpass_t* renderpass = nullptr; + + // TODO: Could guess the required flags from SPIR-V introspection of declared caps + core::bitflag flags = FLAGS::NONE; + + inline uint32_t getShaderCount() const + { + uint32_t count = 0; //count = 2 and only check task shader?? + count += (taskShader.shader != nullptr); + count += (meshShader.shader != nullptr); + count += (fragmentShader.shader != nullptr); + return count; + } + }; + + inline core::bitflag getCreationFlags() const {return m_flags;} + + // Vulkan: const VkPipeline* + virtual const void* getNativeHandle() const = 0; + + protected: + // not explicit? + IGPUMeshPipeline(const SCreationParams& params) : + IGPUPipeline(core::smart_refctd_ptr(params.layout->getOriginDevice()), params.layout, params.cached, params.renderpass), m_flags(params.flags) + {} + virtual ~IGPUMeshPipeline() override = default; + + const core::bitflag m_flags; + }; + +} + +#endif \ No newline at end of file diff --git a/include/nbl/video/ILogicalDevice.h b/include/nbl/video/ILogicalDevice.h index 180342e2d4..19d44b2486 100644 --- a/include/nbl/video/ILogicalDevice.h +++ b/include/nbl/video/ILogicalDevice.h @@ -1020,17 +1020,29 @@ class NBL_API2 ILogicalDevice : public core::IReferenceCounted, public IDeviceMe return createPipelineCache(initialData,notThreadsafe); } - bool createComputePipelines(IGPUPipelineCache* const pipelineCache, const std::span params, core::smart_refctd_ptr* const output); - bool createGraphicsPipelines( IGPUPipelineCache* const pipelineCache, const std::span params, core::smart_refctd_ptr* const output ); - bool createRayTracingPipelines(IGPUPipelineCache* const pipelineCache, - const std::span params, - core::smart_refctd_ptr* const output); + bool createComputePipelines( + IGPUPipelineCache* const pipelineCache, + const std::span params, + core::smart_refctd_ptr* const output + ); + + bool createMeshPipelines( + IGPUPipelineCache* const pipelineCache, + const std::span params, + core::smart_refctd_ptr* const output + ); + + bool createRayTracingPipelines( + IGPUPipelineCache* const pipelineCache, + const std::span params, + core::smart_refctd_ptr* const output + ); // queries inline core::smart_refctd_ptr createQueryPool(const IQueryPool::SCreationParams& params) @@ -1276,16 +1288,22 @@ class NBL_API2 ILogicalDevice : public core::IReferenceCounted, public IDeviceMe } return retval; } + virtual void createGraphicsPipelines_impl( + IGPUPipelineCache* const pipelineCache, + const std::span params, + core::smart_refctd_ptr* const output, + const SSpecializationValidationResult& validation + ) = 0; virtual void createComputePipelines_impl( IGPUPipelineCache* const pipelineCache, const std::span createInfos, core::smart_refctd_ptr* const output, const SSpecializationValidationResult& validation ) = 0; - virtual void createGraphicsPipelines_impl( + virtual void createMeshPipelines_impl( IGPUPipelineCache* const pipelineCache, - const std::span params, - core::smart_refctd_ptr* const output, + const std::span params, + core::smart_refctd_ptr* const output, const SSpecializationValidationResult& validation ) = 0; virtual void createRayTracingPipelines_impl( diff --git a/include/nbl/video/asset_traits.h b/include/nbl/video/asset_traits.h index c4a6c25ca5..2ed08108b5 100644 --- a/include/nbl/video/asset_traits.h +++ b/include/nbl/video/asset_traits.h @@ -9,10 +9,6 @@ #include "nbl/video/IGPUBufferView.h" #include "nbl/asset/ICPUDescriptorSet.h" #include "nbl/video/IGPUDescriptorSet.h" -#include "nbl/asset/ICPUComputePipeline.h" -#include "nbl/video/IGPUComputePipeline.h" -#include "nbl/asset/ICPUGraphicsPipeline.h" -#include "nbl/video/IGPUGraphicsPipeline.h" #include "nbl/asset/ICPUSampler.h" #include "nbl/video/IGPUSampler.h" #include "nbl/asset/ICPUImageView.h" @@ -21,6 +17,13 @@ #include "nbl/video/IGPUAccelerationStructure.h" #include "nbl/asset/ICPUPolygonGeometry.h" #include "nbl/video/IGPUPolygonGeometry.h" + +#include "nbl/asset/ICPUGraphicsPipeline.h" +#include "nbl/video/IGPUGraphicsPipeline.h" +#include "nbl/asset/ICPUComputePipeline.h" +#include "nbl/video/IGPUComputePipeline.h" +#include "nbl/asset/ICPUMeshPipeline.h" +#include "nbl/video/IGPUMeshPipeline.h" #include "nbl/asset/ICPURayTracingPipeline.h" #include "nbl/video/IGPURayTracingPipeline.h" @@ -96,19 +99,6 @@ struct asset_traits using lookup_t = const video_t*; }; -template<> -struct asset_traits -{ - // the asset type - using asset_t = asset::ICPUComputePipeline; - // Pipeline Layout references Descriptor Set Layouts - constexpr static inline bool HasChildren = true; - // the video type - using video_t = IGPUComputePipeline; - // lookup type - using lookup_t = const video_t*; -}; - template<> struct asset_traits @@ -123,19 +113,6 @@ struct asset_traits using lookup_t = const video_t*; }; -template<> -struct asset_traits -{ - // the asset type - using asset_t = asset::ICPUGraphicsPipeline; - // we reference a pipeline layout and a renderpass - constexpr static inline bool HasChildren = true; - // the video type - using video_t = IGPUGraphicsPipeline; - // lookup type - using lookup_t = const video_t*; -}; - template<> struct asset_traits @@ -246,6 +223,45 @@ struct asset_traits }; +template<> +struct asset_traits +{ + // the asset type + using asset_t = asset::ICPUGraphicsPipeline; + // we reference a pipeline layout and a renderpass + constexpr static inline bool HasChildren = true; + // the video type + using video_t = IGPUGraphicsPipeline; + // lookup type + using lookup_t = const video_t*; +}; + +template<> +struct asset_traits +{ + // the asset type + using asset_t = asset::ICPUComputePipeline; + // Pipeline Layout references Descriptor Set Layouts + constexpr static inline bool HasChildren = true; + // the video type + using video_t = IGPUComputePipeline; + // lookup type + using lookup_t = const video_t*; +}; + +template<> +struct asset_traits +{ + // the asset type + using asset_t = asset::ICPUMeshPipeline; + // Pipeline Layout references Descriptor Set Layouts + constexpr static inline bool HasChildren = true; + // the video type + using video_t = IGPUMeshPipeline; + // lookup type + using lookup_t = const video_t*; +}; + template<> struct asset_traits { diff --git a/src/nbl/CMakeLists.txt b/src/nbl/CMakeLists.txt index 512633536f..359b2cc024 100644 --- a/src/nbl/CMakeLists.txt +++ b/src/nbl/CMakeLists.txt @@ -266,7 +266,6 @@ set(NBL_VIDEO_SOURCES video/CVulkanDescriptorSetLayout.cpp video/CVulkanPipelineLayout.cpp video/CVulkanPipelineCache.cpp - video/CVulkanComputePipeline.cpp video/CVulkanDescriptorPool.cpp video/CVulkanDescriptorSet.cpp video/CVulkanMemoryAllocation.cpp @@ -279,6 +278,8 @@ set(NBL_VIDEO_SOURCES video/CVulkanConnection.cpp video/CVulkanPhysicalDevice.cpp video/CVulkanGraphicsPipeline.cpp + video/CVulkanComputePipeline.cpp + video/CVulkanMeshPipeline.cpp video/CVulkanRayTracingPipeline.cpp video/CVulkanEvent.cpp video/CSurfaceVulkan.cpp diff --git a/src/nbl/video/CVulkanCommandBuffer.cpp b/src/nbl/video/CVulkanCommandBuffer.cpp index a55c3a1e7b..f33966588e 100644 --- a/src/nbl/video/CVulkanCommandBuffer.cpp +++ b/src/nbl/video/CVulkanCommandBuffer.cpp @@ -406,15 +406,21 @@ bool CVulkanCommandBuffer::copyAccelerationStructureFromMemory_impl(const asset: return true; } +bool CVulkanCommandBuffer::bindGraphicsPipeline_impl(const IGPUGraphicsPipeline* const pipeline) +{ + getFunctionTable().vkCmdBindPipeline(m_cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, static_cast(pipeline)->getInternalObject()); + return true; +} + bool CVulkanCommandBuffer::bindComputePipeline_impl(const IGPUComputePipeline* const pipeline) { getFunctionTable().vkCmdBindPipeline(m_cmdbuf, VK_PIPELINE_BIND_POINT_COMPUTE, static_cast(pipeline)->getInternalObject()); return true; } -bool CVulkanCommandBuffer::bindGraphicsPipeline_impl(const IGPUGraphicsPipeline* const pipeline) +bool CVulkanCommandBuffer::bindMeshPipeline_impl(const IGPUMeshPipeline* const pipeline) { - getFunctionTable().vkCmdBindPipeline(m_cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, static_cast(pipeline)->getInternalObject()); + getFunctionTable().vkCmdBindPipeline(m_cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, static_cast(pipeline)->getInternalObject()); return true; } @@ -639,6 +645,18 @@ bool CVulkanCommandBuffer::dispatchIndirect_impl(const asset::SBufferBinding& binding, const uint32_t drawCount, const uint32_t stride) +{ + getFunctionTable().vkCmdDrawMeshTasksIndirectEXT(m_cmdbuf, static_cast(binding.buffer.get())->getInternalObject(), binding.offset, drawCount, stride); + return true; +} + bool CVulkanCommandBuffer::beginRenderPass_impl(const SRenderpassBeginInfo& info, const SUBPASS_CONTENTS contents) { diff --git a/src/nbl/video/CVulkanCommandBuffer.h b/src/nbl/video/CVulkanCommandBuffer.h index 9383585b23..ba3925ffe2 100644 --- a/src/nbl/video/CVulkanCommandBuffer.h +++ b/src/nbl/video/CVulkanCommandBuffer.h @@ -181,8 +181,9 @@ class CVulkanCommandBuffer final : public IGPUCommandBuffer bool copyAccelerationStructureToMemory_impl(const IGPUAccelerationStructure* src, const asset::SBufferBinding& dst); bool copyAccelerationStructureFromMemory_impl(const asset::SBufferBinding& src, IGPUAccelerationStructure* dst); - bool bindComputePipeline_impl(const IGPUComputePipeline* const pipeline) override; bool bindGraphicsPipeline_impl(const IGPUGraphicsPipeline* const pipeline) override; + bool bindComputePipeline_impl(const IGPUComputePipeline* const pipeline) override; + bool bindMeshPipeline_impl(const IGPUMeshPipeline* const pipeline) override; bool bindRayTracingPipeline_impl(const IGPURayTracingPipeline* const pipeline) override; bool bindDescriptorSets_impl(const asset::E_PIPELINE_BIND_POINT pipelineBindPoint, const IGPUPipelineLayout* const layout, const uint32_t firstSet, const uint32_t descriptorSetCount, const IGPUDescriptorSet* const* const pDescriptorSets, const uint32_t dynamicOffsetCount = 0u, const uint32_t* const dynamicOffsets = nullptr) override; bool pushConstants_impl(const IGPUPipelineLayout* const layout, const core::bitflag stageFlags, const uint32_t offset, const uint32_t size, const void* const pValues) override; @@ -209,6 +210,9 @@ class CVulkanCommandBuffer final : public IGPUCommandBuffer bool dispatch_impl(const uint32_t groupCountX, const uint32_t groupCountY, const uint32_t groupCountZ) override; bool dispatchIndirect_impl(const asset::SBufferBinding& binding) override; + bool drawMeshTasks_impl(const uint32_t groupCountX, const uint32_t groupCountY, const uint32_t groupCountZ) override; + bool drawMeshTasksIndirect_impl(const asset::SBufferBinding& binding, const uint32_t drawCount, const uint32_t stride) override; + bool beginRenderPass_impl(const SRenderpassBeginInfo& info, SUBPASS_CONTENTS contents) override; bool nextSubpass_impl(const SUBPASS_CONTENTS contents) override; bool endRenderPass_impl() override; diff --git a/src/nbl/video/CVulkanLogicalDevice.cpp b/src/nbl/video/CVulkanLogicalDevice.cpp index 5390b4c3fa..34a24d30d7 100644 --- a/src/nbl/video/CVulkanLogicalDevice.cpp +++ b/src/nbl/video/CVulkanLogicalDevice.cpp @@ -1121,7 +1121,7 @@ VkPipelineShaderStageCreateInfo getVkShaderStageCreateInfoFrom( if (requireFullSubgroups) { - assert(stage==hlsl::ShaderStage::ESS_COMPUTE/*TODO: Or Mesh Or Task*/); + assert(stage == hlsl::ShaderStage::ESS_COMPUTE || stage == hlsl::ShaderStage::ESS_MESH || stage == hlsl::ShaderStage::ESS_TASK); retval.flags |= VK_PIPELINE_SHADER_STAGE_CREATE_REQUIRE_FULL_SUBGROUPS_BIT; } } @@ -1176,8 +1176,8 @@ void CVulkanLogicalDevice::createComputePipelines_impl( for (const auto& info : createInfos) { initPipelineCreateInfo(outCreateInfo,info); - const auto& spec = info.shader; - outCreateInfo->stage = getVkShaderStageCreateInfoFrom(spec, hlsl::ShaderStage::ESS_COMPUTE, info.cached.requireFullSubgroups, outShaderModule, outEntryPoints, outRequiredSubgroupSize, outSpecInfo, outSpecMapEntry, outSpecData); + + outCreateInfo->stage = getVkShaderStageCreateInfoFrom(info.shader, hlsl::ShaderStage::ESS_COMPUTE, info.cached.requireFullSubgroups, outShaderModule, outEntryPoints, outRequiredSubgroupSize, outSpecInfo, outSpecMapEntry, outSpecData); outCreateInfo++; } auto vk_pipelines = reinterpret_cast(output); @@ -1202,26 +1202,136 @@ void CVulkanLogicalDevice::createComputePipelines_impl( std::fill_n(output,vk_createInfos.size(),nullptr); } -void CVulkanLogicalDevice::createGraphicsPipelines_impl( - IGPUPipelineCache* const pipelineCache, - const std::span createInfos, - core::smart_refctd_ptr* const output, - const SSpecializationValidationResult& validation -) -{ - auto getVkStencilOpStateFrom = [](const asset::SStencilOpParams& params)->VkStencilOpState - { - return { - .failOp = static_cast(params.failOp), - .passOp = static_cast(params.passOp), - .depthFailOp = static_cast(params.depthFailOp), - .compareOp = static_cast(params.compareOp) - }; +void PopulateViewport(VkPipelineViewportStateCreateInfo& outViewport, nbl::asset::SRasterizationParams const& raster) { + outViewport.viewportCount = raster.viewportCount; + // must be identical to viewport count unless VK_DYNAMIC_STATE_VIEWPORT_WITH_COUNT_EXT or VK_DYNAMIC_STATE_SCISSOR_WITH_COUNT_EXT are used + outViewport.scissorCount = raster.viewportCount; +} + + +void PopulateRaster(VkPipelineRasterizationStateCreateInfo& outRaster, nbl::asset::SRasterizationParams const& raster) { + outRaster.depthClampEnable = raster.depthClampEnable; + outRaster.rasterizerDiscardEnable = raster.rasterizerDiscard; + outRaster.polygonMode = static_cast(raster.polygonMode); + outRaster.cullMode = static_cast(raster.faceCullingMode); + outRaster.frontFace = raster.frontFaceIsCCW ? VK_FRONT_FACE_COUNTER_CLOCKWISE : VK_FRONT_FACE_CLOCKWISE; + outRaster.depthBiasEnable = raster.depthBiasEnable; +} + +void PopulateMultisample(VkPipelineMultisampleStateCreateInfo& outMultisample, nbl::asset::SRasterizationParams const& raster) { + outMultisample.rasterizationSamples = static_cast(0x1 << raster.samplesLog2); + if (raster.minSampleShadingUnorm > 0) { + outMultisample.sampleShadingEnable = true; + outMultisample.minSampleShading = float(raster.minSampleShadingUnorm) / 255.f; + } + else { + outMultisample.sampleShadingEnable = false; + outMultisample.minSampleShading = 0.f; + } + outMultisample.pSampleMask = raster.sampleMask; + outMultisample.alphaToCoverageEnable = raster.alphaToCoverageEnable; + outMultisample.alphaToOneEnable = raster.alphaToOneEnable; +} +VkStencilOpState getVkStencilOpStateFrom(const asset::SStencilOpParams& params) { + return { + .failOp = static_cast(params.failOp), + .passOp = static_cast(params.passOp), + .depthFailOp = static_cast(params.depthFailOp), + .compareOp = static_cast(params.compareOp) }; +} - const auto& features = getEnabledFeatures(); +void PopulateDepthStencil(VkPipelineDepthStencilStateCreateInfo& outDepthStencil, nbl::asset::SRasterizationParams const& raster) { + outDepthStencil.depthTestEnable = raster.depthTestEnable(); + outDepthStencil.depthWriteEnable = raster.depthWriteEnable; + outDepthStencil.depthCompareOp = static_cast(raster.depthCompareOp); + outDepthStencil.depthBoundsTestEnable = raster.depthBoundsTestEnable; + outDepthStencil.stencilTestEnable = raster.stencilTestEnable(); + outDepthStencil.front = getVkStencilOpStateFrom(raster.frontStencilOps); + outDepthStencil.back = getVkStencilOpStateFrom(raster.backStencilOps); +} + +void PopulateColorBlend( + VkPipelineColorBlendStateCreateInfo& outColorBlend, + VkPipelineColorBlendAttachmentState*& outColorBlendAttachmentState, + nbl::asset::SBlendParams const& blend, + nbl::asset::IRenderpass::SCreationParams::SSubpassDescription const& subpass +) { + //outColorBlend->flags no attachment order access yet + outColorBlend.logicOpEnable = blend.logicOp != asset::ELO_NO_OP; + outColorBlend.logicOp = getVkLogicOpFromLogicOp(blend.logicOp); + outColorBlend.pAttachments = outColorBlendAttachmentState; + for (auto i = 0; i < IGPURenderpass::SCreationParams::SSubpassDescription::MaxColorAttachments; i++) { + if (subpass.colorAttachments[i].render.used()) { + const auto& params = blend.blendParams[i]; + outColorBlendAttachmentState->blendEnable = params.blendEnabled(); + outColorBlendAttachmentState->srcColorBlendFactor = getVkBlendFactorFromBlendFactor(static_cast(params.srcColorFactor)); + outColorBlendAttachmentState->dstColorBlendFactor = getVkBlendFactorFromBlendFactor(static_cast(params.dstColorFactor)); + outColorBlendAttachmentState->colorBlendOp = getVkBlendOpFromBlendOp(static_cast(params.colorBlendOp)); + outColorBlendAttachmentState->srcAlphaBlendFactor = getVkBlendFactorFromBlendFactor(static_cast(params.srcAlphaFactor)); + outColorBlendAttachmentState->dstAlphaBlendFactor = getVkBlendFactorFromBlendFactor(static_cast(params.dstAlphaFactor)); + outColorBlendAttachmentState->alphaBlendOp = getVkBlendOpFromBlendOp(static_cast(params.alphaBlendOp)); + outColorBlendAttachmentState->colorWriteMask = getVkColorComponentFlagsFromColorWriteMask(params.colorWriteMask); + outColorBlendAttachmentState++; + //^that pointer iterator is how we ensure the attachments or consecutive + } + } + outColorBlend.attachmentCount = std::distance(outColorBlend.pAttachments, outColorBlendAttachmentState); +} + +template +void PopulateMeshGraphicsCommonData( + const std::span createInfos, + core::vector& vk_createInfos, + + core::vector& vk_viewportStates, + core::vector& vk_rasterizationStates, + core::vector& vk_multisampleStates, + core::vector& vk_depthStencilStates, + core::vector& vk_colorBlendStates, + core::vector& vk_colorBlendAttachmentStates, + + core::vector& vk_dynamicStates, + const VkPipelineDynamicStateCreateInfo& vk_dynamicStateCreateInfo +) { + //the main concern is lifetime, so don't want to construct, move, or copy anything in here + + auto outColorBlendAttachmentState = vk_colorBlendAttachmentStates.data(); //the pointer iterator is used - core::vector vk_dynamicStates = { + + for (uint32_t i = 0; i < createInfos.size(); i++) { //whats the maximum number of pipelines that can be created at once? uint32_t to be safe + auto& info = createInfos[i]; + const auto& blend = info.cached.blend; + const auto& raster = info.cached.rasterization; + const auto& subpass = info.renderpass->getCreationParameters().subpasses[info.cached.subpassIx]; + + initPipelineCreateInfo(&vk_createInfos[i], info); + + PopulateViewport(vk_viewportStates[i], raster); + PopulateRaster(vk_rasterizationStates[i], raster); + PopulateMultisample(vk_multisampleStates[i], raster); + PopulateDepthStencil(vk_depthStencilStates[i], raster); + PopulateColorBlend(vk_colorBlendStates[i], outColorBlendAttachmentState, blend, subpass); + //PopulateDynamicState(dynState, ?) + + + vk_createInfos[i].pViewportState = &vk_viewportStates[i]; + vk_createInfos[i].pRasterizationState = &vk_rasterizationStates[i]; + vk_createInfos[i].pMultisampleState = &vk_multisampleStates[i]; + vk_createInfos[i].pDepthStencilState = &vk_depthStencilStates[i]; + vk_createInfos[i].pColorBlendState = &vk_colorBlendStates[i]; + vk_createInfos[i].pDynamicState = &vk_dynamicStateCreateInfo; + vk_createInfos[i].renderPass = static_cast(info.renderpass)->getInternalObject(); + vk_createInfos[i].subpass = info.cached.subpassIx; + //handle + //index + //layout? + // ^ handled in initPipelineCreateInfo + } +} + +core::vector getDefaultDynamicStates(SPhysicalDeviceFeatures const& features) { + core::vector ret = { VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR, VK_DYNAMIC_STATE_LINE_WIDTH, @@ -1231,19 +1341,164 @@ void CVulkanLogicalDevice::createGraphicsPipelines_impl( VK_DYNAMIC_STATE_STENCIL_WRITE_MASK, VK_DYNAMIC_STATE_STENCIL_REFERENCE }; - if (features.depthBounds) - vk_dynamicStates.push_back(VK_DYNAMIC_STATE_DEPTH_BOUNDS); + if (features.depthBounds) { + ret.push_back(VK_DYNAMIC_STATE_DEPTH_BOUNDS); + } // TODO: VK_DYNAMIC_STATE_DISCARD_RECTANGLE_EXT, VK_DYNAMIC_STATE_DISCARD_RECTANGLE_ENABLE_EXT, VK_DYNAMIC_STATE_DISCARD_RECTANGLE_MODE_EXT - - const VkPipelineDynamicStateCreateInfo vk_dynamicStateCreateInfo = { + + return ret; +} + +//maximum cleanliness,i tried it and im not a big fan +//struct CommonPipelineStruct { +// VkPipelineRasterizationStateCreateInfo vk_rasterizationStates{ VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,nullptr,0 }; +// VkPipelineMultisampleStateCreateInfo vk_multisampleStates{ VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,nullptr,0 }; +// VkPipelineDepthStencilStateCreateInfo vk_depthStencilStates{ VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,nullptr,0 }; +// VkPipelineColorBlendStateCreateInfo vk_colorBlendStates{ VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,nullptr,0 }; +// core::vector vk_colorBlendAttachmentStates{ IGPURenderpass::SCreationParams::SSubpassDescription::MaxColorAttachments }; +//}; + + +void CVulkanLogicalDevice::createMeshPipelines_impl( + IGPUPipelineCache* const pipelineCache, + const std::span createInfos, + core::smart_refctd_ptr* const output, + const SSpecializationValidationResult& validation +) { + const auto& features = getEnabledFeatures(); + + const VkPipelineCache vk_pipelineCache = pipelineCache ? static_cast(pipelineCache)->getInternalObject() : VK_NULL_HANDLE; + + core::vector vk_createInfos(createInfos.size(), { VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,nullptr }); + + core::vector vk_rasterizationStates(createInfos.size(), { VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,nullptr,0 }); + core::vector vk_multisampleStates(createInfos.size(), { VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,nullptr,0 }); + core::vector vk_depthStencilStates(createInfos.size(), { VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,nullptr,0 }); + core::vector vk_colorBlendStates(createInfos.size(), { VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,nullptr,0 }); + core::vector vk_colorBlendAttachmentStates(createInfos.size() * IGPURenderpass::SCreationParams::SSubpassDescription::MaxColorAttachments); + + core::vector vk_dynamicStates = getDefaultDynamicStates(features); + + const VkPipelineDynamicStateCreateInfo vk_dynamicStateCreateInfo = { .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, .pNext = nullptr, .flags = 0u, .dynamicStateCount = static_cast(vk_dynamicStates.size()), .pDynamicStates = vk_dynamicStates.data() }; + core::vector vk_viewportStates(createInfos.size(), { + .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, + .pNext = nullptr, // the extensions that interest us have a dynamic state variant anyway + .flags = 0, // must be 0 + .viewportCount = 0, + .pViewports = nullptr, + .scissorCount = 0, + .pScissors = nullptr, + }); - const VkPipelineCache vk_pipelineCache = pipelineCache ? static_cast(pipelineCache)->getInternalObject():VK_NULL_HANDLE; + PopulateMeshGraphicsCommonData( + createInfos, vk_createInfos, + + vk_viewportStates, + vk_rasterizationStates, + vk_multisampleStates, + vk_depthStencilStates, + vk_colorBlendStates, + vk_colorBlendAttachmentStates, + + vk_dynamicStates, vk_dynamicStateCreateInfo + ); + + //not used in mesh pipelines + for (auto& outCreateInfo : vk_createInfos) { + outCreateInfo.pVertexInputState = nullptr; + outCreateInfo.pInputAssemblyState = nullptr; + outCreateInfo.pTessellationState = nullptr; + } + auto outCreateInfo = vk_createInfos.data(); + + const auto maxShaderStages = createInfos.size() * IGPUMeshPipeline::MESH_SHADER_STAGE_COUNT; + core::vector vk_shaderStage(maxShaderStages, { VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,nullptr }); + core::vector vk_shaderModule(maxShaderStages, { VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,nullptr, 0 }); + core::vector entryPoints(maxShaderStages); + core::vector vk_requiredSubgroupSize(maxShaderStages, { + VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO,nullptr }); + core::vector vk_specializationInfos(maxShaderStages, { 0,nullptr,0,nullptr }); + core::vector vk_specializationMapEntry(validation.count); + core::vector specializationData(validation.dataSize); + auto outShaderStage = vk_shaderStage.data(); + auto outEntryPoints = entryPoints.data(); + auto outShaderModule = vk_shaderModule.data(); + auto outRequiredSubgroupSize = vk_requiredSubgroupSize.data(); + auto outSpecInfo = vk_specializationInfos.data(); + auto outSpecMapEntry = vk_specializationMapEntry.data(); + auto outSpecData = specializationData.data(); + + //shader + for (const auto& info : createInfos) + { + outCreateInfo->pStages = outShaderStage; + auto processSpecShader = [&](IGPUPipelineBase::SShaderSpecInfo spec, hlsl::ShaderStage shaderStage) + { + if (spec.shader) + { + *(outShaderStage++) = getVkShaderStageCreateInfoFrom(spec, + shaderStage, + false, + outShaderModule, + outEntryPoints, + outRequiredSubgroupSize, + outSpecInfo, + outSpecMapEntry, + outSpecData + ); + outCreateInfo->stageCount = std::distancepStages)>(outCreateInfo->pStages, outShaderStage); + } + }; + processSpecShader(info.taskShader, hlsl::ShaderStage::ESS_TASK); + processSpecShader(info.meshShader, hlsl::ShaderStage::ESS_MESH); + processSpecShader(info.fragmentShader, hlsl::ShaderStage::ESS_FRAGMENT); + + outCreateInfo++; + } + + auto vk_pipelines = reinterpret_cast(output); + std::stringstream debugNameBuilder; + if (m_devf.vk.vkCreateGraphicsPipelines(m_vkdev, vk_pipelineCache, vk_createInfos.size(), vk_createInfos.data(), nullptr, vk_pipelines) == VK_SUCCESS) + { + for (size_t i = 0ull; i < createInfos.size(); ++i) + { + const auto& createInfo = createInfos[i]; + const VkPipeline vk_pipeline = vk_pipelines[i]; + // break the lifetime cause of the aliasing + std::uninitialized_default_construct_n(output + i, 1); + output[i] = core::make_smart_refctd_ptr(createInfos[i], vk_pipeline); + debugNameBuilder.str(""); + auto buildDebugName = [&](const IGPUPipelineBase::SShaderSpecInfo& spec, hlsl::ShaderStage stage) + { + if (spec.shader != nullptr) + debugNameBuilder << spec.shader->getFilepathHint() << "(" << spec.entryPoint << "," << stage << ")\n"; + }; + buildDebugName(createInfo.taskShader, hlsl::ESS_TASK); + buildDebugName(createInfo.meshShader, hlsl::ESS_MESH); + buildDebugName(createInfo.fragmentShader, hlsl::ESS_FRAGMENT); + output[i]->setObjectDebugName(debugNameBuilder.str().c_str()); + } + } + else + std::fill_n(output, vk_createInfos.size(), nullptr); +} + +void CVulkanLogicalDevice::createGraphicsPipelines_impl( + IGPUPipelineCache* const pipelineCache, + const std::span createInfos, + core::smart_refctd_ptr* const output, + const SSpecializationValidationResult& validation +) +{ + const auto& features = getEnabledFeatures(); + + const VkPipelineCache vk_pipelineCache = pipelineCache ? static_cast(pipelineCache)->getInternalObject() : VK_NULL_HANDLE; // Interesting things to put in pNext: // - AttachmentSampleCountInfoAMD // - Graphics Pipeline Library styff @@ -1252,24 +1507,26 @@ void CVulkanLogicalDevice::createGraphicsPipelines_impl( // - Discard Rectangle State // - Fragment Shading Rate State Creation Info // - Piepline Robustness - core::vector vk_createInfos(createInfos.size(),{VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,nullptr}); - const auto maxShaderStages = createInfos.size()*IGPUGraphicsPipeline::GRAPHICS_SHADER_STAGE_COUNT; - core::vector vk_shaderStage(maxShaderStages,{VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,nullptr}); - core::vector vk_shaderModule(maxShaderStages,{VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,nullptr, 0}); - core::vector entryPoints(maxShaderStages); - core::vector vk_requiredSubgroupSize(maxShaderStages,{ - VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO,nullptr - }); - core::vector vk_specializationInfos(maxShaderStages,{0,nullptr,0,nullptr}); - core::vector vk_specializationMapEntry(validation.count); - core::vector specializationData(validation.dataSize); - core::vector vk_vertexInput(createInfos.size(),{VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,nullptr,0}); - core::vector vk_inputBinding(createInfos.size()*asset::SVertexInputParams::MAX_ATTR_BUF_BINDING_COUNT); - core::vector vk_inputAttribute(createInfos.size()*asset::SVertexInputParams::MAX_VERTEX_ATTRIB_COUNT); - core::vector vk_inputAssembly(createInfos.size(),{VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,nullptr,0}); - core::vector vk_tessellation(createInfos.size(),{VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO,nullptr,0}); - core::vector vk_viewportStates(createInfos.size(),{ + //maximum cleanliness, I create a struct that holds this for mesh and graphics? + core::vector vk_createInfos(createInfos.size(), { VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,nullptr }); + + core::vector vk_rasterizationStates(createInfos.size(), { VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,nullptr,0 }); + core::vector vk_multisampleStates(createInfos.size(), { VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,nullptr,0 }); + core::vector vk_depthStencilStates(createInfos.size(), { VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,nullptr,0 }); + core::vector vk_colorBlendStates(createInfos.size(), { VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,nullptr,0 }); + core::vector vk_colorBlendAttachmentStates(createInfos.size() * IGPURenderpass::SCreationParams::SSubpassDescription::MaxColorAttachments); + + core::vector vk_dynamicStates = getDefaultDynamicStates(features); + + const VkPipelineDynamicStateCreateInfo vk_dynamicStateCreateInfo = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0u, + .dynamicStateCount = static_cast(vk_dynamicStates.size()), + .pDynamicStates = vk_dynamicStates.data() + }; + core::vector vk_viewportStates(createInfos.size(), { .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, .pNext = nullptr, // the extensions that interest us have a dynamic state variant anyway .flags = 0, // must be 0 @@ -1277,68 +1534,54 @@ void CVulkanLogicalDevice::createGraphicsPipelines_impl( .pViewports = nullptr, .scissorCount = 0, .pScissors = nullptr, - }); - core::vector vk_rasterizationStates(createInfos.size(),{VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,nullptr,0}); - core::vector vk_multisampleStates(createInfos.size(),{VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,nullptr,0}); - core::vector vk_depthStencilStates(createInfos.size(),{VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,nullptr,0}); - core::vector vk_colorBlendStates(createInfos.size(),{VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,nullptr,0}); - core::vector vk_colorBlendAttachmentStates(createInfos.size()*IGPURenderpass::SCreationParams::SSubpassDescription::MaxColorAttachments); + }); + + PopulateMeshGraphicsCommonData( + createInfos, vk_createInfos, + + vk_viewportStates, + vk_rasterizationStates, + vk_multisampleStates, + vk_depthStencilStates, + vk_colorBlendStates, + vk_colorBlendAttachmentStates, + + vk_dynamicStates, vk_dynamicStateCreateInfo + ); + + + core::vector vk_inputBinding(createInfos.size() * asset::SVertexInputParams::MAX_ATTR_BUF_BINDING_COUNT); + core::vector vk_inputAttribute(createInfos.size() * asset::SVertexInputParams::MAX_VERTEX_ATTRIB_COUNT); + core::vector vk_inputAssembly(createInfos.size(), { VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,nullptr,0 }); + core::vector vk_tessellation(createInfos.size(), { VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO,nullptr,0 }); + core::vector vk_vertexInput(createInfos.size(), { VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,nullptr,0 }); auto outCreateInfo = vk_createInfos.data(); - auto outShaderStage = vk_shaderStage.data(); - auto outEntryPoints = entryPoints.data(); - auto outShaderModule = vk_shaderModule.data(); - auto outRequiredSubgroupSize = vk_requiredSubgroupSize.data(); - auto outSpecInfo = vk_specializationInfos.data(); - auto outSpecMapEntry = vk_specializationMapEntry.data(); - auto outSpecData = specializationData.data(); auto outVertexInput = vk_vertexInput.data(); auto outInputBinding = vk_inputBinding.data(); auto outInputAttribute = vk_inputAttribute.data(); - auto outInputAssembly = vk_inputAssembly.data(); auto outTessellation = vk_tessellation.data(); - auto outViewport = vk_viewportStates.data(); - auto outRaster = vk_rasterizationStates.data(); - auto outMultisample = vk_multisampleStates.data(); - auto outDepthStencil = vk_depthStencilStates.data(); - auto outColorBlend = vk_colorBlendStates.data(); - auto outColorBlendAttachmentState = vk_colorBlendAttachmentStates.data(); + auto outInputAssembly = vk_inputAssembly.data(); + //ill acknowledge this additional looping is a little ugly + //input and tess for (const auto& info : createInfos) { - initPipelineCreateInfo(outCreateInfo,info); - outCreateInfo->pStages = outShaderStage; - auto processSpecShader = [&](IGPUPipelineBase::SShaderSpecInfo spec, hlsl::ShaderStage shaderStage) { - if (spec.shader) - { - *(outShaderStage++) = getVkShaderStageCreateInfoFrom(spec, shaderStage, false, outShaderModule, outEntryPoints, outRequiredSubgroupSize, outSpecInfo, outSpecMapEntry, outSpecData); - outCreateInfo->stageCount = std::distancepStages)>(outCreateInfo->pStages, outShaderStage); - } - }; - processSpecShader(info.vertexShader, hlsl::ShaderStage::ESS_VERTEX); - processSpecShader(info.tesselationControlShader, hlsl::ShaderStage::ESS_TESSELLATION_CONTROL); - processSpecShader(info.tesselationEvaluationShader, hlsl::ShaderStage::ESS_TESSELLATION_EVALUATION); - processSpecShader(info.geometryShader, hlsl::ShaderStage::ESS_GEOMETRY); - processSpecShader(info.fragmentShader, hlsl::ShaderStage::ESS_FRAGMENT); - - // when dealing with mesh shaders, the vertex input and assembly state will be null - { - { - const auto& vertexInputParams = info.cached.vertexInput; - outVertexInput->pVertexBindingDescriptions = outInputBinding; - for (auto b=0u; bpVertexBindingDescriptions = outInputBinding; + for (auto b = 0u; b < asset::SVertexInputParams::MAX_ATTR_BUF_BINDING_COUNT; b++) + if (vertexInputParams.enabledBindingFlags & (1 << b)) { outInputBinding->binding = b; outInputBinding->stride = vertexInputParams.bindings[b].stride; outInputBinding->inputRate = static_cast(vertexInputParams.bindings[b].inputRate); outInputBinding++; } - outVertexInput->vertexBindingDescriptionCount = std::distance(outVertexInput->pVertexBindingDescriptions,outInputBinding); - outVertexInput->pVertexAttributeDescriptions = outInputAttribute; - for (auto l=0u; lvertexBindingDescriptionCount = std::distance(outVertexInput->pVertexBindingDescriptions, outInputBinding); + outVertexInput->pVertexAttributeDescriptions = outInputAttribute; + for (auto l = 0u; l < asset::SVertexInputParams::MAX_VERTEX_ATTRIB_COUNT; l++) + if (vertexInputParams.enabledAttribFlags & (1 << l)) { outInputAttribute->location = l; outInputAttribute->binding = vertexInputParams.attributes[l].binding; @@ -1346,16 +1589,15 @@ void CVulkanLogicalDevice::createGraphicsPipelines_impl( outInputAttribute->offset = vertexInputParams.attributes[l].relativeOffset; outInputAttribute++; } - outVertexInput->vertexAttributeDescriptionCount = std::distance(outVertexInput->pVertexAttributeDescriptions,outInputAttribute); - } - outCreateInfo->pVertexInputState = outVertexInput++; - { - const auto& primAssParams = info.cached.primitiveAssembly; - outInputAssembly->topology = static_cast(primAssParams.primitiveType); - outInputAssembly->primitiveRestartEnable = primAssParams.primitiveRestartEnable; - } - outCreateInfo->pInputAssemblyState = outInputAssembly++; + outVertexInput->vertexAttributeDescriptionCount = std::distance(outVertexInput->pVertexAttributeDescriptions, outInputAttribute); } + outCreateInfo->pVertexInputState = outVertexInput++; + { + const auto& primAssParams = info.cached.primitiveAssembly; + outInputAssembly->topology = static_cast(primAssParams.primitiveType); + outInputAssembly->primitiveRestartEnable = primAssParams.primitiveRestartEnable; + } + outCreateInfo->pInputAssemblyState = outInputAssembly++; if (info.tesselationControlShader.shader || info.tesselationEvaluationShader.shader) { @@ -1363,96 +1605,76 @@ void CVulkanLogicalDevice::createGraphicsPipelines_impl( outCreateInfo->pTessellationState = outTessellation++; } - const auto& raster = info.cached.rasterization; - { - outViewport->viewportCount = raster.viewportCount; - // must be identical to viewport count unless VK_DYNAMIC_STATE_VIEWPORT_WITH_COUNT_EXT or VK_DYNAMIC_STATE_SCISSOR_WITH_COUNT_EXT are used - outViewport->scissorCount = raster.viewportCount; - outCreateInfo->pViewportState = outViewport++; - } - { - outRaster->depthClampEnable = raster.depthClampEnable; - outRaster->rasterizerDiscardEnable = raster.rasterizerDiscard; - outRaster->polygonMode = static_cast(raster.polygonMode); - outRaster->cullMode = static_cast(raster.faceCullingMode); - outRaster->frontFace = raster.frontFaceIsCCW ? VK_FRONT_FACE_COUNTER_CLOCKWISE:VK_FRONT_FACE_CLOCKWISE; - outRaster->depthBiasEnable = raster.depthBiasEnable; - outCreateInfo->pRasterizationState = outRaster++; - } - { - outMultisample->rasterizationSamples = static_cast(0x1<0) - { - outMultisample->sampleShadingEnable = true; - outMultisample->minSampleShading = float(raster.minSampleShadingUnorm)/255.f; - } - else - { - outMultisample->sampleShadingEnable = false; - outMultisample->minSampleShading = 0.f; - } - outMultisample->pSampleMask = raster.sampleMask; - outMultisample->alphaToCoverageEnable = raster.alphaToCoverageEnable; - outMultisample->alphaToOneEnable = raster.alphaToOneEnable; - outCreateInfo->pMultisampleState = outMultisample++; - } - { - //outDepthStencil->flags no attachment order access yet - outDepthStencil->depthTestEnable = raster.depthTestEnable(); - outDepthStencil->depthWriteEnable = raster.depthWriteEnable; - outDepthStencil->depthCompareOp = static_cast(raster.depthCompareOp); - outDepthStencil->depthBoundsTestEnable = raster.depthBoundsTestEnable; - outDepthStencil->stencilTestEnable = raster.stencilTestEnable(); - outDepthStencil->front = getVkStencilOpStateFrom(raster.frontStencilOps); - outDepthStencil->back = getVkStencilOpStateFrom(raster.backStencilOps); - outCreateInfo->pDepthStencilState = outDepthStencil++; - } - { - const auto& blend = info.cached.blend; - const auto& subpass = info.renderpass->getCreationParameters().subpasses[info.cached.subpassIx]; - //outColorBlend->flags no attachment order access yet - outColorBlend->logicOpEnable = blend.logicOp!=asset::ELO_NO_OP; - outColorBlend->logicOp = getVkLogicOpFromLogicOp(blend.logicOp); - outColorBlend->pAttachments = outColorBlendAttachmentState; - for (auto i=0; i vk_shaderStage(maxShaderStages, { VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,nullptr }); + core::vector vk_shaderModule(maxShaderStages, { VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,nullptr, 0 }); + core::vector entryPoints(maxShaderStages); + core::vector vk_requiredSubgroupSize(maxShaderStages, { + VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO,nullptr + }); + core::vector vk_specializationInfos(maxShaderStages, { 0,nullptr,0,nullptr }); + core::vector vk_specializationMapEntry(validation.count); + core::vector specializationData(validation.dataSize); + + outCreateInfo = vk_createInfos.data(); + auto outShaderStage = vk_shaderStage.data(); + auto outEntryPoints = entryPoints.data(); + auto outShaderModule = vk_shaderModule.data(); + auto outRequiredSubgroupSize = vk_requiredSubgroupSize.data(); + auto outSpecInfo = vk_specializationInfos.data(); + auto outSpecMapEntry = vk_specializationMapEntry.data(); + auto outSpecData = specializationData.data(); + + //shader + for (const auto& info : createInfos) + { + outCreateInfo->pStages = outShaderStage; + auto processSpecShader = [&](IGPUPipelineBase::SShaderSpecInfo spec, hlsl::ShaderStage shaderStage) { - const auto& params = blend.blendParams[i]; - outColorBlendAttachmentState->blendEnable = params.blendEnabled(); - outColorBlendAttachmentState->srcColorBlendFactor = getVkBlendFactorFromBlendFactor(static_cast(params.srcColorFactor)); - outColorBlendAttachmentState->dstColorBlendFactor = getVkBlendFactorFromBlendFactor(static_cast(params.dstColorFactor)); - outColorBlendAttachmentState->colorBlendOp = getVkBlendOpFromBlendOp(static_cast(params.colorBlendOp)); - outColorBlendAttachmentState->srcAlphaBlendFactor = getVkBlendFactorFromBlendFactor(static_cast(params.srcAlphaFactor)); - outColorBlendAttachmentState->dstAlphaBlendFactor = getVkBlendFactorFromBlendFactor(static_cast(params.dstAlphaFactor)); - outColorBlendAttachmentState->alphaBlendOp = getVkBlendOpFromBlendOp(static_cast(params.alphaBlendOp)); - outColorBlendAttachmentState->colorWriteMask = getVkColorComponentFlagsFromColorWriteMask(params.colorWriteMask); - outColorBlendAttachmentState++; - } - outColorBlend->attachmentCount = std::distance(outColorBlend->pAttachments,outColorBlendAttachmentState); - outCreateInfo->pColorBlendState = outColorBlend++; - } - outCreateInfo->pDynamicState = &vk_dynamicStateCreateInfo; - outCreateInfo->renderPass = static_cast(info.renderpass)->getInternalObject(); - outCreateInfo->subpass = info.cached.subpassIx; + if (spec.shader) + { + *(outShaderStage++) = getVkShaderStageCreateInfoFrom(spec, + shaderStage, + false, + outShaderModule, + outEntryPoints, + outRequiredSubgroupSize, + outSpecInfo, + outSpecMapEntry, + outSpecData + ); + outCreateInfo->stageCount = std::distancepStages)>(outCreateInfo->pStages, outShaderStage); + } + }; + processSpecShader(info.vertexShader, hlsl::ShaderStage::ESS_VERTEX); + processSpecShader(info.tesselationControlShader, hlsl::ShaderStage::ESS_TESSELLATION_CONTROL); + processSpecShader(info.tesselationEvaluationShader, hlsl::ShaderStage::ESS_TESSELLATION_EVALUATION); + processSpecShader(info.geometryShader, hlsl::ShaderStage::ESS_GEOMETRY); + processSpecShader(info.fragmentShader, hlsl::ShaderStage::ESS_FRAGMENT); + outCreateInfo++; } + auto vk_pipelines = reinterpret_cast(output); std::stringstream debugNameBuilder; - if (m_devf.vk.vkCreateGraphicsPipelines(m_vkdev,vk_pipelineCache,vk_createInfos.size(),vk_createInfos.data(),nullptr,vk_pipelines)==VK_SUCCESS) + if (m_devf.vk.vkCreateGraphicsPipelines(m_vkdev, vk_pipelineCache, vk_createInfos.size(), vk_createInfos.data(), nullptr, vk_pipelines) == VK_SUCCESS) { - for (size_t i=0ull; i(createInfos[i],vk_pipeline); + std::uninitialized_default_construct_n(output + i, 1); + output[i] = core::make_smart_refctd_ptr(createInfos[i], vk_pipeline); debugNameBuilder.str(""); auto buildDebugName = [&](const IGPUPipelineBase::SShaderSpecInfo& spec, hlsl::ShaderStage stage) - { - if (spec.shader != nullptr) - debugNameBuilder <getFilepathHint() << "(" << spec.entryPoint << "," << stage << ")\n"; - }; + { + if (spec.shader != nullptr) + debugNameBuilder << spec.shader->getFilepathHint() << "(" << spec.entryPoint << "," << stage << ")\n"; + }; buildDebugName(createInfo.vertexShader, hlsl::ESS_VERTEX); buildDebugName(createInfo.tesselationControlShader, hlsl::ESS_TESSELLATION_CONTROL); buildDebugName(createInfo.tesselationEvaluationShader, hlsl::ESS_TESSELLATION_EVALUATION); @@ -1462,7 +1684,7 @@ void CVulkanLogicalDevice::createGraphicsPipelines_impl( } } else - std::fill_n(output,vk_createInfos.size(),nullptr); + std::fill_n(output, vk_createInfos.size(), nullptr); } void CVulkanLogicalDevice::createRayTracingPipelines_impl( diff --git a/src/nbl/video/CVulkanLogicalDevice.h b/src/nbl/video/CVulkanLogicalDevice.h index 4cc633ec55..ae0da50eaf 100644 --- a/src/nbl/video/CVulkanLogicalDevice.h +++ b/src/nbl/video/CVulkanLogicalDevice.h @@ -20,7 +20,6 @@ #include "nbl/video/CVulkanSampler.h" #include "nbl/video/CVulkanPipelineLayout.h" #include "nbl/video/CVulkanPipelineCache.h" -#include "nbl/video/CVulkanComputePipeline.h" #include "nbl/video/CVulkanDescriptorPool.h" #include "nbl/video/CVulkanDescriptorSet.h" #include "nbl/video/CVulkanMemoryAllocation.h" @@ -29,7 +28,10 @@ #include "nbl/video/CVulkanImage.h" #include "nbl/video/CVulkanDeferredOperation.h" #include "nbl/video/CVulkanAccelerationStructure.h" + #include "nbl/video/CVulkanGraphicsPipeline.h" +#include "nbl/video/CVulkanComputePipeline.h" +#include "nbl/video/CVulkanMeshPipeline.h" #include "nbl/video/CVulkanRayTracingPipeline.h" namespace nbl::video @@ -281,16 +283,22 @@ class CVulkanLogicalDevice final : public ILogicalDevice core::smart_refctd_ptr createFramebuffer_impl(IGPUFramebuffer::SCreationParams&& params) override; // pipelines + void createGraphicsPipelines_impl( + IGPUPipelineCache* const pipelineCache, + const std::span params, + core::smart_refctd_ptr* const output, + const SSpecializationValidationResult& validation + ) override; void createComputePipelines_impl( IGPUPipelineCache* const pipelineCache, const std::span createInfos, core::smart_refctd_ptr* const output, const SSpecializationValidationResult& validation ) override; - void createGraphicsPipelines_impl( + void createMeshPipelines_impl( IGPUPipelineCache* const pipelineCache, - const std::span params, - core::smart_refctd_ptr* const output, + const std::span params, + core::smart_refctd_ptr* const output, const SSpecializationValidationResult& validation ) override; diff --git a/src/nbl/video/CVulkanMeshPipeline.cpp b/src/nbl/video/CVulkanMeshPipeline.cpp new file mode 100644 index 0000000000..8fa3cc63eb --- /dev/null +++ b/src/nbl/video/CVulkanMeshPipeline.cpp @@ -0,0 +1,27 @@ +#include "nbl/video/CVulkanMeshPipeline.h" + +#include "nbl/video/CVulkanLogicalDevice.h" + +namespace nbl::video +{ + + CVulkanMeshPipeline::~CVulkanMeshPipeline() + { + const CVulkanLogicalDevice* vulkanDevice = static_cast(getOriginDevice()); + auto* vk = vulkanDevice->getFunctionTable(); + vk->vk.vkDestroyPipeline(vulkanDevice->getInternalObject(), m_vkPipeline, nullptr); + } + void CVulkanMeshPipeline::setObjectDebugName(const char* label) const + { + IBackendObject::setObjectDebugName(label); + + if (vkSetDebugUtilsObjectNameEXT == 0) return; + + const CVulkanLogicalDevice* vulkanDevice = static_cast(getOriginDevice()); + VkDebugUtilsObjectNameInfoEXT nameInfo = { VK_STRUCTURE_TYPE_DEBUG_UTILS_OBJECT_NAME_INFO_EXT, nullptr }; + nameInfo.objectType = VK_OBJECT_TYPE_PIPELINE; + nameInfo.objectHandle = reinterpret_cast(getInternalObject()); + nameInfo.pObjectName = getObjectDebugName(); + vkSetDebugUtilsObjectNameEXT(vulkanDevice->getInternalObject(), &nameInfo); + } +} \ No newline at end of file diff --git a/src/nbl/video/CVulkanMeshPipeline.h b/src/nbl/video/CVulkanMeshPipeline.h new file mode 100644 index 0000000000..39b11695a7 --- /dev/null +++ b/src/nbl/video/CVulkanMeshPipeline.h @@ -0,0 +1,33 @@ +#ifndef _NBL_C_VULKAN_MESH_PIPELINE_H_INCLUDED_ +#define _NBL_C_VULKAN_MESH_PIPELINE_H_INCLUDED_ + + +#include "nbl/video/IGPUMeshPipeline.h" + +#include + +namespace nbl::video +{ + + //potentially collapse this so Mesh just uses CVulkanGraphicsPipeline + //if thats done, BindMesh can go away +class CVulkanMeshPipeline final : public IGPUMeshPipeline +{ + public: + CVulkanMeshPipeline(const SCreationParams& params, const VkPipeline vk_pipeline) : + IGPUMeshPipeline(params), m_vkPipeline(vk_pipeline) {} + + inline const void* getNativeHandle() const override {return &m_vkPipeline;} + + inline VkPipeline getInternalObject() const {return m_vkPipeline;} + + void setObjectDebugName(const char* label) const override; //exists in compute but not in graphics + private: + ~CVulkanMeshPipeline(); + + const VkPipeline m_vkPipeline; +}; + +} + +#endif \ No newline at end of file diff --git a/src/nbl/video/CVulkanPhysicalDevice.cpp b/src/nbl/video/CVulkanPhysicalDevice.cpp index da86d7c9d9..163bd93026 100644 --- a/src/nbl/video/CVulkanPhysicalDevice.cpp +++ b/src/nbl/video/CVulkanPhysicalDevice.cpp @@ -732,6 +732,11 @@ std::unique_ptr CVulkanPhysicalDevice::create(core::smart VkPhysicalDeviceCooperativeMatrixFeaturesKHR cooperativeMatrixFeatures = { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COOPERATIVE_MATRIX_FEATURES_KHR }; VkPhysicalDeviceMaintenance5FeaturesKHR maintenance5Features = { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_5_FEATURES_KHR }; VkPhysicalDeviceGraphicsPipelineLibraryFeaturesEXT graphicsPipelineLibraryFeatures = { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_GRAPHICS_PIPELINE_LIBRARY_FEATURES_EXT }; + VkPhysicalDeviceMeshShaderFeaturesEXT meshShaderFeatures = { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MESH_SHADER_FEATURES_EXT }; + + if (isExtensionSupported(VK_EXT_MESH_SHADER_EXTENSION_NAME)) { + addToPNextChain(&meshShaderFeatures); + } if (isExtensionSupported(VK_EXT_CONDITIONAL_RENDERING_EXTENSION_NAME)) addToPNextChain(&conditionalRenderingFeatures); @@ -818,6 +823,44 @@ std::unique_ptr CVulkanPhysicalDevice::create(core::smart features.geometryShader = deviceFeatures.features.geometryShader; features.tessellationShader = deviceFeatures.features.tessellationShader; + //check if features are existant first + //potentially put a copy of VkPhysicalDeviceMeshShaderFeaturesEXT directly into features + //depends on the less obvious properties + if (isExtensionSupported(VK_EXT_MESH_SHADER_EXTENSION_NAME)) { + features.meshShader = meshShaderFeatures.meshShader; + features.taskShader = meshShaderFeatures.taskShader; + //TODO + //VkBool32 multiviewMeshShader; + //VkBool32 primitiveFragmentShadingRateMeshShader; + //VkBool32 meshShaderQueries; + + //VkPhysicalDeviceMeshShaderPropertiesEXT + //#define LIMIT_INIT_MESH(limitMemberName) properties.limits.limitMemberName = meshShaderProperties.limitMemberName + //LIMIT_INIT_MESH(maxTaskWorkGroupTotalCount); + //LIMIT_INIT_MESH(maxTaskWorkGroupInvocations); + //LIMIT_INIT_MESH(maxTaskPayloadSize); + //LIMIT_INIT_MESH(maxTaskSharedMemorySize); + //LIMIT_INIT_MESH(maxTaskPayloadAndSharedMemorySize); + //LIMIT_INIT_MESH(maxMeshWorkGroupInvocations); + //LIMIT_INIT_MESH(maxMeshSharedMemorySize); + //LIMIT_INIT_MESH(maxMeshPayloadAndSharedMemorySize); + //LIMIT_INIT_MESH(maxMeshOutputMemorySize); + //LIMIT_INIT_MESH(maxMeshOutputComponents); + //LIMIT_INIT_MESH(maxMeshOutputVertices); + //LIMIT_INIT_MESH(maxMeshOutputPrimitives); + //LIMIT_INIT_MESH(maxMeshOutputLayers); + //LIMIT_INIT_MESH(maxMeshMultiviewViewCount); + //LIMIT_INIT_MESH(maxMeshOutputPerVertexGranularity); + //LIMIT_INIT_MESH(maxMeshOutputPerPrimitiveGranularity); + + //for(uint8_t i = 0; i < 3; i++){ + // LIMIT_INIT_MESH(maxTaskWorkGroupCount[i]); + // LIMIT_INIT_MESH(maxTaskWorkGroupSize[i]); + // LIMIT_INIT_MESH(maxMeshWorkGroupCount[i]); + // LIMIT_INIT_MESH(maxMeshWorkGroupSize[i]); + //} + //#undef LIMIT_INIT_MESH + } if (!deviceFeatures.features.sampleRateShading || !deviceFeatures.features.dualSrcBlend) RETURN_NULL_PHYSICAL_DEVICE; properties.limits.logicOp = deviceFeatures.features.logicOp; @@ -1491,6 +1534,9 @@ core::smart_refctd_ptr CVulkanPhysicalDevice::createLogicalDevic enableExtensionIfAvailable(VK_EXT_SHADER_STENCIL_EXPORT_EXTENSION_NAME); + VkPhysicalDeviceMeshShaderFeaturesEXT meshShaderFeatures = {VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MESH_SHADER_FEATURES_EXT, nullptr}; + REQUIRE_EXTENSION_IF(enabledFeatures.meshShader, VK_EXT_MESH_SHADER_EXTENSION_NAME, &meshShaderFeatures); + VkPhysicalDeviceAccelerationStructureFeaturesKHR accelerationStructureFeatures = { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ACCELERATION_STRUCTURE_FEATURES_KHR,nullptr }; VkPhysicalDeviceRayTracingMaintenance1FeaturesKHR rayTracingMaintenance1Features = { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_TRACING_MAINTENANCE_1_FEATURES_KHR,nullptr }; REQUIRE_EXTENSION_IF(enabledFeatures.accelerationStructure,VK_KHR_ACCELERATION_STRUCTURE_EXTENSION_NAME,&accelerationStructureFeatures); // feature dependency taken care of @@ -1821,6 +1867,12 @@ core::smart_refctd_ptr CVulkanPhysicalDevice::createLogicalDevic //shaderSMBuiltinsFeaturesNV [LIMIT SO ENABLE EVERYTHING BY DEFAULT] representativeFragmentTestFeatures.representativeFragmentTest = enabledFeatures.representativeFragmentTest; + + meshShaderFeatures.taskShader = enabledFeatures.taskShader; + meshShaderFeatures.meshShader = enabledFeatures.meshShader; + meshShaderFeatures.primitiveFragmentShadingRateMeshShader = VK_FALSE;//needs to be explicitly set? + meshShaderFeatures.meshShaderQueries = VK_FALSE; + meshShaderFeatures.multiviewMeshShader = VK_FALSE; //shaderClockFeatures [LIMIT SO ENABLE EVERYTHING BY DEFAULT] diff --git a/src/nbl/video/IGPUCommandBuffer.cpp b/src/nbl/video/IGPUCommandBuffer.cpp index 1f619666ab..1807829b1b 100644 --- a/src/nbl/video/IGPUCommandBuffer.cpp +++ b/src/nbl/video/IGPUCommandBuffer.cpp @@ -963,6 +963,32 @@ template NBL_API2 bool IGPUCommandBuffer::copyAccelerationStructureFromMemory(const IGPUTopLevelAccelerationStructure::DeviceCopyFromMemoryInfo&); +bool IGPUCommandBuffer::bindGraphicsPipeline(const IGPUGraphicsPipeline* const pipeline) +{ + // Because binding of the Gfx pipeline can happen outside of a Renderpass Scope, + // we cannot check renderpass-pipeline compatibility here. + // And checking before every drawcall would be performance suicide. + if (!checkStateBeforeRecording(queue_flags_t::GRAPHICS_BIT)) + return false; + + if (!pipeline || !this->isCompatibleDevicewise(pipeline)) + { + NBL_LOG_ERROR("incompatible pipeline device!"); + return false; + } + + if (!m_cmdpool->m_commandListPool.emplace(m_commandList, core::smart_refctd_ptr(pipeline))) + { + NBL_LOG_ERROR("out of host memory!"); + return false; + } + + m_boundRasterizationPipeline = reinterpret_cast(pipeline); + + m_noCommands = false; + return bindGraphicsPipeline_impl(pipeline); +} + bool IGPUCommandBuffer::bindComputePipeline(const IGPUComputePipeline* const pipeline) { if (!checkStateBeforeRecording(queue_flags_t::COMPUTE_BIT)) @@ -988,7 +1014,7 @@ bool IGPUCommandBuffer::bindComputePipeline(const IGPUComputePipeline* const pip return true; } -bool IGPUCommandBuffer::bindGraphicsPipeline(const IGPUGraphicsPipeline* const pipeline) +bool IGPUCommandBuffer::bindMeshPipeline(const IGPUMeshPipeline* const pipeline) { // Because binding of the Gfx pipeline can happen outside of a Renderpass Scope, // we cannot check renderpass-pipeline compatibility here. @@ -1002,16 +1028,16 @@ bool IGPUCommandBuffer::bindGraphicsPipeline(const IGPUGraphicsPipeline* const p return false; } - if (!m_cmdpool->m_commandListPool.emplace(m_commandList, core::smart_refctd_ptr(pipeline))) + if (!m_cmdpool->m_commandListPool.emplace(m_commandList, core::smart_refctd_ptr(pipeline))) { NBL_LOG_ERROR("out of host memory!"); return false; } - m_boundGraphicsPipeline = pipeline; + m_boundRasterizationPipeline = reinterpret_cast(pipeline); m_noCommands = false; - return bindGraphicsPipeline_impl(pipeline); + return bindMeshPipeline_impl(pipeline); } bool IGPUCommandBuffer::bindRayTracingPipeline(const IGPURayTracingPipeline* const pipeline) @@ -1462,6 +1488,59 @@ bool IGPUCommandBuffer::dispatchIndirect(const asset::SBufferBindinggetPhysicalDevice()->getLimits(); + if (groupCountX > limits.maxMeshWorkGroupCount[0] || groupCountY > limits.maxMeshWorkGroupCount[1] || groupCountZ > limits.maxMeshWorkGroupCount[2]) + { + NBL_LOG_ERROR("group counts (%d, %d, %d) exceeds maximum counts (%d, %d, %d)!", groupCountX, groupCountY, groupCountZ, limits.maxMeshWorkGroupCount[0], limits.maxMeshWorkGroupCount[1], limits.maxMeshWorkGroupCount[2]); + return false; + } + + m_noCommands = false; + return drawMeshTasks_impl(groupCountX, groupCountY, groupCountZ); +} + +bool IGPUCommandBuffer::drawMeshTasksIndirect(const asset::SBufferBinding& binding, const uint32_t drawCount, uint32_t stride) +{ + if (!checkStateBeforeRecording(queue_flags_t::GRAPHICS_BIT,RENDERPASS_SCOPE::INSIDE)) + return false; + if (invalidBufferBinding(binding,4u/*TODO: is it really 4?*/,IGPUBuffer::EUF_INDIRECT_BUFFER_BIT)){ + return false; + } + + if (drawCount) { + if (drawCount==1u) + stride = sizeof(hlsl::DrawMeshTasksIndirectCommand_t); + if (stride&0x3u || stride getOriginDevice()->getPhysicalDevice()->getLimits().maxDrawIndirectCount) { + NBL_LOG_ERROR("draw count (%d) exceeds maximum allowed amount (%d)!", drawCount, getOriginDevice()->getPhysicalDevice()->getLimits().maxDrawIndirectCount); + return false; + } + if (invalidBufferRange({ binding.offset,stride * (drawCount - 1u) + sizeof(hlsl::DrawMeshTasksIndirectCommand_t),binding.buffer }, alignof(uint32_t), IGPUBuffer::EUF_INDIRECT_BUFFER_BIT)) + return false; + } // i get the feeling the vk command shouldnt be called if drawCount is 0, but this is how drawindirect does it + + if (!m_cmdpool->m_commandListPool.emplace(m_commandList,core::smart_refctd_ptr(binding.buffer))) + { + NBL_LOG_ERROR("out of host memory!"); + return false; + } + + m_noCommands = false; + return drawMeshTasksIndirect_impl(binding, drawCount, stride); +} bool IGPUCommandBuffer::beginRenderPass(SRenderpassBeginInfo info, const SUBPASS_CONTENTS contents) { diff --git a/src/nbl/video/ILogicalDevice.cpp b/src/nbl/video/ILogicalDevice.cpp index 7c3f5dbb81..79f7f507d4 100644 --- a/src/nbl/video/ILogicalDevice.cpp +++ b/src/nbl/video/ILogicalDevice.cpp @@ -9,53 +9,53 @@ using namespace nbl::video; class SpirvTrimTask { - public: - using EntryPoints = core::set; - struct ShaderInfo - { - EntryPoints entryPoints; - const asset::IShader* trimmedShader; - }; +public: + using EntryPoints = core::set; + struct ShaderInfo + { + EntryPoints entryPoints; + const asset::IShader* trimmedShader; + }; - SpirvTrimTask(asset::ISPIRVEntryPointTrimmer* trimer, system::logger_opt_ptr logger) : m_trimmer(trimer), m_logger(logger) - { - - } + SpirvTrimTask(asset::ISPIRVEntryPointTrimmer* trimer, system::logger_opt_ptr logger) : m_trimmer(trimer), m_logger(logger) + { - void insertEntryPoint(const IGPUPipelineBase::SShaderSpecInfo& shaderSpec, const hlsl::ShaderStage stage) - { - const auto* shader = shaderSpec.shader; - auto it = m_shaderInfoMap.find(shader); - if (it == m_shaderInfoMap.end() || it->first != shader) - it = m_shaderInfoMap.emplace_hint(it, shader, ShaderInfo{ EntryPoints(), nullptr } ); - it->second.entryPoints.insert({ .name = shaderSpec.entryPoint, .stage = stage }); - } + } - IGPUPipelineBase::SShaderSpecInfo trim(const IGPUPipelineBase::SShaderSpecInfo& shaderSpec, core::vector>& outShaders) + void insertEntryPoint(const IGPUPipelineBase::SShaderSpecInfo& shaderSpec, const hlsl::ShaderStage stage) + { + const auto* shader = shaderSpec.shader; + auto it = m_shaderInfoMap.find(shader); + if (it == m_shaderInfoMap.end() || it->first != shader) + it = m_shaderInfoMap.emplace_hint(it, shader, ShaderInfo{ EntryPoints(), nullptr } ); + it->second.entryPoints.insert({ .name = shaderSpec.entryPoint, .stage = stage }); + } + + IGPUPipelineBase::SShaderSpecInfo trim(const IGPUPipelineBase::SShaderSpecInfo& shaderSpec, core::vector>& outShaders) + { + const auto* shader = shaderSpec.shader; + auto findResult = m_shaderInfoMap.find(shader); + assert(findResult != m_shaderInfoMap.end()); + const auto& entryPoints = findResult->second.entryPoints; + auto& trimmedShader = findResult->second.trimmedShader; + + auto trimmedShaderSpec = shaderSpec; + if (shader != nullptr) { - const auto* shader = shaderSpec.shader; - auto findResult = m_shaderInfoMap.find(shader); - assert(findResult != m_shaderInfoMap.end()); - const auto& entryPoints = findResult->second.entryPoints; - auto& trimmedShader = findResult->second.trimmedShader; - - auto trimmedShaderSpec = shaderSpec; - if (shader != nullptr) + if (trimmedShader == nullptr) { - if (trimmedShader == nullptr) - { - outShaders.push_back(m_trimmer->trim(shader, entryPoints, m_logger)); - trimmedShader = outShaders.back().get(); - } - trimmedShaderSpec.shader = trimmedShader; + outShaders.push_back(m_trimmer->trim(shader, entryPoints, m_logger)); + trimmedShader = outShaders.back().get(); } - return trimmedShaderSpec; + trimmedShaderSpec.shader = trimmedShader; } - - private: - core::map m_shaderInfoMap; - asset::ISPIRVEntryPointTrimmer* m_trimmer; - const system::logger_opt_ptr m_logger; + return trimmedShaderSpec; + } + +private: + core::map m_shaderInfoMap; + asset::ISPIRVEntryPointTrimmer* m_trimmer; + const system::logger_opt_ptr m_logger; }; ILogicalDevice::ILogicalDevice(core::smart_refctd_ptr&& api, const IPhysicalDevice* const physicalDevice, const SCreationParams& params, const bool runningInRenderdoc) @@ -511,23 +511,23 @@ bool ILogicalDevice::updateDescriptorSets(const std::spanvalidateWrite(write); switch (asset::IDescriptor::GetTypeCategory(*outCategory = writeValidationResults[i].type)) { - case asset::IDescriptor::EC_BUFFER: - params.bufferCount += writeCount; - break; - case asset::IDescriptor::EC_SAMPLER: - case asset::IDescriptor::EC_IMAGE: - params.imageCount += writeCount; - break; - case asset::IDescriptor::EC_BUFFER_VIEW: - params.bufferViewCount += writeCount; - break; - case asset::IDescriptor::EC_ACCELERATION_STRUCTURE: - params.accelerationStructureCount += writeCount; - params.accelerationStructureWriteCount++; - break; - default: // validation failed - NBL_LOG_ERROR("Invalid descriptor type (descriptorWrites[%u])", i); - return false; + case asset::IDescriptor::EC_BUFFER: + params.bufferCount += writeCount; + break; + case asset::IDescriptor::EC_SAMPLER: + case asset::IDescriptor::EC_IMAGE: + params.imageCount += writeCount; + break; + case asset::IDescriptor::EC_BUFFER_VIEW: + params.bufferViewCount += writeCount; + break; + case asset::IDescriptor::EC_ACCELERATION_STRUCTURE: + params.accelerationStructureCount += writeCount; + params.accelerationStructureWriteCount++; + break; + default: // validation failed + NBL_LOG_ERROR("Invalid descriptor type (descriptorWrites[%u])", i); + return false; } outCategory++; } @@ -591,23 +591,23 @@ bool ILogicalDevice::nullifyDescriptors(const std::span newParams(params.begin(), params.end()); const auto shaderCount = params.size(); - + core::vector> trimmedShaders; // vector to hold all the trimmed shaders, so the pointer from the new ShaderSpecInfo is not dangling trimmedShaders.reserve(shaderCount); @@ -818,7 +818,7 @@ bool ILogicalDevice::createComputePipelines(IGPUPipelineCache* const pipelineCac } createComputePipelines_impl(pipelineCache,newParams,output,specConstantValidation); - + bool retval = true; for (auto i=0u; igetCreationParameters(); + const auto& subpass = passParams.subpasses[subpassIndex]; + if (subpass.viewMask) + { + /* + // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-renderPass-06047 + if (!limits.multiviewTessellationShader && .test(tesS_contrOL)) + return false; + // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-renderPass-06048 + if (!limits.multiviewGeomtryShader && .test(GEOMETRY)) + return false; + */ + // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-renderPass-06578 + //NOTE: index of MSB must be less than maxMultiviewViewCount; wrong negation here, should be >= + if (hlsl::findMSB(subpass.viewMask) > limits.maxMultiviewViewCount) + { + NBL_LOG_ERROR("Invalid viewMask (params[%u])", subpassIndex); + return false; + } + } + if (subpass.depthStencilAttachment.render.used()) + { + const auto& attachment = passParams.depthStencilAttachments[subpass.depthStencilAttachment.render.attachmentIndex]; + + // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-multisampledRenderToSingleSampled-06853 + bool sampleCountNeedsToMatch = !features.mixedAttachmentSamples /*&& !features.multisampledRenderToSingleSampled*/; + // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-subpass-01411 + if (/*detect NV version && */(rasterParams.depthTestEnable() || rasterParams.stencilTestEnable() || rasterParams.depthBoundsTestEnable)) + sampleCountNeedsToMatch = true; + if (sampleCountNeedsToMatch && attachment.samples != samples) + { + NBL_LOG_ERROR("Depth stencil and rasterization samples need to match (params[%u])", subpassIndex); + return false; + } + } + for (auto i = 0; i < IGPURenderpass::SCreationParams::SSubpassDescription::MaxColorAttachments; i++) + { + const auto& render = subpass.colorAttachments[i].render; + if (render.used()) + { + const auto& attachment = passParams.colorAttachments[render.attachmentIndex]; + // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-renderPass-06041 + if (blendParams.blendParams[i].blendEnabled() && !formatUsages[attachment.format].attachmentBlend) + { + NBL_LOG_ERROR("Invalid color attachment (params[%u].colorAttachments[%u])", subpassIndex, i); + return false; + } + + // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-multisampledRenderToSingleSampled-06853 + if (!features.mixedAttachmentSamples /*&& !features.multisampledRenderToSingleSampled*/ && attachment.samples != samples) + { + NBL_LOG_ERROR("Color attachment and rasterization samples need to match (params[%u].colorAttachments[%u])", subpassIndex, i); + return false; + } + // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-subpass-01412 + if (/*detect NV version && */(attachment.samples > samples)) + { + NBL_LOG_ERROR("Invalid color attachment (params[%u].colorAttachments[%u])", subpassIndex, i); + return false; + } + } + } + + return true; +} + +//this is a COPY of graphics pipeline, with MINOR adjustments. +//no changes should be made DIRECTLY here +//UNLESS it's DIRECTLY for mesh/task +//there SHOULD be a function that duplicates functionality between graphics and mesh pipeline that can be adjusted first +bool ILogicalDevice::createMeshPipelines( + IGPUPipelineCache* const pipelineCache, + const std::span params, + core::smart_refctd_ptr* const output +) { + std::fill_n(output, params.size(), nullptr); + SSpecializationValidationResult specConstantValidation = commonCreatePipelines(pipelineCache, params); + if (!specConstantValidation) { + NBL_LOG_ERROR("Invalid parameters were given"); + return false; + } + + const auto& features = getEnabledFeatures(); + const auto& limits = getPhysicalDeviceLimits(); + + core::vector newParams(params.begin(), params.end()); + const auto shaderCount = std::accumulate(params.begin(), params.end(), 0, [](uint32_t sum, auto& param) + {return sum + param.getShaderCount(); } + ); + core::vector> trimmedShaders; // vector to hold all the trimmed shaders, so the pointer from the new ShaderSpecInfo is not dangling + trimmedShaders.reserve(shaderCount); + + for (auto ix = 0u; ix < params.size(); ix++) + { + const auto& ci = params[ix]; + + if (params[ix].taskShader.shader != nullptr) { + if (!features.taskShader) { + NBL_LOG_ERROR("Feature `mesh shader` is not enabled"); + return false; + } + } + + //check extensions here + //it SEEMS like createGraphicsPipeline does, but it does it in a weird way I don't understand? + //geo and tess are just flat disabled?? + if (!features.meshShader) { + NBL_LOG_ERROR("Feature `mesh shader` is not enabled"); + return false; + } + + auto renderpass = ci.renderpass; + if (!renderpass->wasCreatedBy(this)) { + NBL_LOG_ERROR("Invalid renderpass was given (params[%u])", ix); + return false; + } + + + MeshGraphicsCommonValidation(renderpass, ci.cached.subpassIx, limits, features, ci.cached.rasterization, ci.cached.blend, m_logger, getPhysicalDevice()->getImageFormatUsagesOptimalTiling()); + + SpirvTrimTask trimTask(m_spirvTrimmer.get(), m_logger); + trimTask.insertEntryPoint(ci.taskShader, hlsl::ShaderStage::ESS_TASK); + trimTask.insertEntryPoint(ci.meshShader, hlsl::ShaderStage::ESS_MESH); + trimTask.insertEntryPoint(ci.fragmentShader, hlsl::ShaderStage::ESS_FRAGMENT); + + newParams[ix].taskShader = trimTask.trim(ci.taskShader, trimmedShaders); + newParams[ix].meshShader = trimTask.trim(ci.meshShader, trimmedShaders); + newParams[ix].fragmentShader = trimTask.trim(ci.fragmentShader, trimmedShaders); + } + createMeshPipelines_impl(pipelineCache, newParams, output, specConstantValidation); + + for (auto i = 0u; i < params.size(); i++) + { + if (!output[i]) + { + NBL_LOG_ERROR("MeshPipeline was not created (params[%u])", i); + return false; + } + else + { + m_logger.log("shader[%d] mesh debug name - %s\n", nbl::system::ILogger::ELL_DEBUG, i, params[i].meshShader.shader->getDebugName()); + // TODO: set pipeline debug name thats a concatenation of all active stages' shader file path hints + } + } + return true; +} + bool ILogicalDevice::createGraphicsPipelines( IGPUPipelineCache* const pipelineCache, const std::span params, @@ -851,9 +1017,9 @@ bool ILogicalDevice::createGraphicsPipelines( const auto& limits = getPhysicalDeviceLimits(); core::vector newParams(params.begin(), params.end()); const auto shaderCount = std::accumulate(params.begin(), params.end(), 0, [](uint32_t sum, auto& param) - { - return sum + param.getShaderCount(); - }); + { + return sum + param.getShaderCount(); + }); core::vector> trimmedShaders; // vector to hold all the trimmed shaders, so the pointer from the new ShaderSpecInfo is not dangling trimmedShaders.reserve(shaderCount); @@ -880,7 +1046,7 @@ bool ILogicalDevice::createGraphicsPipelines( NBL_LOG_ERROR("Cannot create IGPUShader for %p, Geometry Shader feature not enabled!", ci.geometryShader.shader); return false; } - + auto renderpass = ci.renderpass; if (!renderpass->wasCreatedBy(this)) { @@ -888,88 +1054,13 @@ bool ILogicalDevice::createGraphicsPipelines( return false; } - const auto& rasterParams = ci.cached.rasterization; - if (rasterParams.alphaToOneEnable && !features.alphaToOne) - { - NBL_LOG_ERROR("Feature `alpha to one` is not enabled"); - return false; - } - if (rasterParams.depthBoundsTestEnable && !features.depthBounds) - { - NBL_LOG_ERROR("Feature `depth bounds` is not enabled"); - return false; - } - - const auto samples = 0x1u << rasterParams.samplesLog2; - // TODO: loads more validation on extra parameters here! // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-lineRasterizationMode-02766 // TODO: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-subpass-01505 // baiscally the AMD version must have the rasterization samples equal to the maximum of all attachment samples counts - const auto& passParams = renderpass->getCreationParameters(); - const auto& subpass = passParams.subpasses[ci.cached.subpassIx]; - if (subpass.viewMask) - { - /* - // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-renderPass-06047 - if (!limits.multiviewTessellationShader && .test(tesS_contrOL)) - return false; - // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-renderPass-06048 - if (!limits.multiviewGeomtryShader && .test(GEOMETRY)) - return false; - */ - // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-renderPass-06578 - //NOTE: index of MSB must be less than maxMultiviewViewCount; wrong negation here, should be >= - if (hlsl::findMSB(subpass.viewMask) > limits.maxMultiviewViewCount) - { - NBL_LOG_ERROR("Invalid viewMask (params[%u])", ix); - return false; - } - } - if (subpass.depthStencilAttachment.render.used()) - { - const auto& attachment = passParams.depthStencilAttachments[subpass.depthStencilAttachment.render.attachmentIndex]; - - // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-multisampledRenderToSingleSampled-06853 - bool sampleCountNeedsToMatch = !features.mixedAttachmentSamples /*&& !features.multisampledRenderToSingleSampled*/; - // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-subpass-01411 - if (/*detect NV version && */(rasterParams.depthTestEnable() || rasterParams.stencilTestEnable() || rasterParams.depthBoundsTestEnable)) - sampleCountNeedsToMatch = true; - if (sampleCountNeedsToMatch && attachment.samples != samples) - { - NBL_LOG_ERROR("Invalid depth stencil attachment (params[%u])", ix); - return false; - } - } - for (auto i = 0; i < IGPURenderpass::SCreationParams::SSubpassDescription::MaxColorAttachments; i++) - { - const auto& render = subpass.colorAttachments[i].render; - if (render.used()) - { - const auto& attachment = passParams.colorAttachments[render.attachmentIndex]; - // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-renderPass-06041 - if (ci.cached.blend.blendParams[i].blendEnabled() && !getPhysicalDevice()->getImageFormatUsagesOptimalTiling()[attachment.format].attachmentBlend) - { - NBL_LOG_ERROR("Invalid color attachment (params[%u].colorAttachments[%u])", ix, i); - return false; - } - - // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-multisampledRenderToSingleSampled-06853 - if (!features.mixedAttachmentSamples /*&& !features.multisampledRenderToSingleSampled*/ && attachment.samples != samples) - { - NBL_LOG_ERROR("Invalid color attachment (params[%u].colorAttachments[%u])", ix, i); - return false; - } - // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-subpass-01412 - if (/*detect NV version && */(attachment.samples > samples)) - { - NBL_LOG_ERROR("Invalid color attachment (params[%u].colorAttachments[%u])", ix, i); - return false; - } - } - } + MeshGraphicsCommonValidation(renderpass, ci.cached.subpassIx, limits, features, ci.cached.rasterization, ci.cached.blend, m_logger, getPhysicalDevice()->getImageFormatUsagesOptimalTiling()); SpirvTrimTask trimTask(m_spirvTrimmer.get(), m_logger); trimTask.insertEntryPoint(ci.vertexShader, hlsl::ShaderStage::ESS_VERTEX); @@ -977,7 +1068,7 @@ bool ILogicalDevice::createGraphicsPipelines( trimTask.insertEntryPoint(ci.tesselationEvaluationShader, hlsl::ShaderStage::ESS_TESSELLATION_EVALUATION); trimTask.insertEntryPoint(ci.geometryShader, hlsl::ShaderStage::ESS_GEOMETRY); trimTask.insertEntryPoint(ci.fragmentShader, hlsl::ShaderStage::ESS_FRAGMENT); - + newParams[ix].vertexShader = trimTask.trim(ci.vertexShader, trimmedShaders); newParams[ix].tesselationControlShader = trimTask.trim(ci.tesselationControlShader, trimmedShaders); newParams[ix].tesselationEvaluationShader = trimTask.trim(ci.tesselationEvaluationShader, trimmedShaders); @@ -1002,9 +1093,9 @@ bool ILogicalDevice::createGraphicsPipelines( return true; } -bool ILogicalDevice::createRayTracingPipelines(IGPUPipelineCache* const pipelineCache, - const std::span params, - core::smart_refctd_ptr* const output) +bool ILogicalDevice::createRayTracingPipelines(IGPUPipelineCache* const pipelineCache, + const std::span params, + core::smart_refctd_ptr* const output) { std::fill_n(output,params.size(),nullptr); SSpecializationValidationResult specConstantValidation = commonCreatePipelines(pipelineCache,params); @@ -1044,15 +1135,15 @@ bool ILogicalDevice::createRayTracingPipelines(IGPUPipelineCache* const pipeline // https://registry.khronos.org/vulkan/specs/latest/html/vkspec.html#VUID-VkRayTracingPipelineCreateInfoKHR-rayTraversalPrimitiveCulling-03596 if (skipAABBs && !features.rayTraversalPrimitiveCulling) { - NBL_LOG_ERROR("Feature `rayTraversalPrimitiveCulling` is not enabled when pipeline is created with SKIP_AABBS"); - return false; + NBL_LOG_ERROR("Feature `rayTraversalPrimitiveCulling` is not enabled when pipeline is created with SKIP_AABBS"); + return false; } // https://registry.khronos.org/vulkan/specs/latest/html/vkspec.html#VUID-VkRayTracingPipelineCreateInfoKHR-rayTraversalPrimitiveCulling-03597 if (skipBuiltin && !features.rayTraversalPrimitiveCulling) { - NBL_LOG_ERROR("Feature `rayTraversalPrimitiveCulling` is not enabled when pipeline is created with SKIP_BUILT_IN_PRIMITIVES"); - return false; + NBL_LOG_ERROR("Feature `rayTraversalPrimitiveCulling` is not enabled when pipeline is created with SKIP_BUILT_IN_PRIMITIVES"); + return false; } } @@ -1061,17 +1152,17 @@ bool ILogicalDevice::createRayTracingPipelines(IGPUPipelineCache* const pipeline core::vector> trimmedShaders; // vector to hold all the trimmed shaders, so the pointer from the new ShaderSpecInfo is not dangling const auto missGroupCount = std::accumulate(params.begin(), params.end(), 0, [](uint32_t sum, auto& param) - { - return sum + static_cast(param.shaderGroups.misses.size()); - }); + { + return sum + static_cast(param.shaderGroups.misses.size()); + }); const auto hitGroupCount = std::accumulate(params.begin(), params.end(), 0, [](uint32_t sum, auto& param) - { - return sum + static_cast(param.shaderGroups.hits.size()); - }); + { + return sum + static_cast(param.shaderGroups.hits.size()); + }); const auto callableGroupCount = std::accumulate(params.begin(), params.end(), 0, [](uint32_t sum, auto& param) - { - return sum + static_cast(param.shaderGroups.callables.size()); - }); + { + return sum + static_cast(param.shaderGroups.callables.size()); + }); core::vector trimmedMissSpecs(missGroupCount); @@ -1084,7 +1175,7 @@ bool ILogicalDevice::createRayTracingPipelines(IGPUPipelineCache* const pipeline const auto& limits = getPhysicalDeviceLimits(); for (auto ix = 0u; ix < params.size(); ix++) { - + const auto& param = params[ix]; // https://docs.vulkan.org/spec/latest/chapters/pipelines.html#VUID-VkRayTracingPipelineCreateInfoKHR-maxPipelineRayRecursionDepth-03589 @@ -1137,7 +1228,7 @@ bool ILogicalDevice::createRayTracingPipelines(IGPUPipelineCache* const pipeline } createRayTracingPipelines_impl(pipelineCache, newParams,output,specConstantValidation); - + bool retval = true; for (auto i=0u; i Date: Wed, 25 Feb 2026 13:37:53 -0600 Subject: [PATCH 2/3] first response commit there will be another commit. I renamed meshgraphicscommon to rasterizationpipelinecommon i added mesh pipelines to cassetconverter --- include/nbl/asset/ICPUGraphicsPipeline.h | 14 +- include/nbl/asset/ICPUMeshPipeline.h | 34 +- include/nbl/asset/IMeshPipeline.h | 49 ++ include/nbl/video/IGPUCommandBuffer.h | 29 +- include/nbl/video/IGPUGraphicsPipeline.h | 2 +- include/nbl/video/IGPUMeshPipeline.h | 41 +- include/nbl/video/ILogicalDevice.h | 6 + include/nbl/video/utilities/CAssetConverter.h | 10 +- src/nbl/video/CVulkanCommandBuffer.cpp | 9 + src/nbl/video/CVulkanCommandBuffer.h | 8 +- src/nbl/video/CVulkanLogicalDevice.cpp | 50 +- src/nbl/video/CVulkanMeshPipeline.cpp | 35 +- src/nbl/video/CVulkanPhysicalDevice.cpp | 16 +- src/nbl/video/IGPUCommandBuffer.cpp | 94 ++- src/nbl/video/ILogicalDevice.cpp | 106 ++- src/nbl/video/utilities/CAssetConverter.cpp | 686 ++++++++++++------ 16 files changed, 747 insertions(+), 442 deletions(-) create mode 100644 include/nbl/asset/IMeshPipeline.h diff --git a/include/nbl/asset/ICPUGraphicsPipeline.h b/include/nbl/asset/ICPUGraphicsPipeline.h index acc990f18c..45fb19fe69 100644 --- a/include/nbl/asset/ICPUGraphicsPipeline.h +++ b/include/nbl/asset/ICPUGraphicsPipeline.h @@ -53,21 +53,21 @@ class ICPUGraphicsPipeline final : public ICPUPipeline getSpecInfo(const hlsl::ShaderStage stage) { - if (!isMutable()) return nullptr; + if (!isMutable()) return {}; const auto stageIndex = stageToIndex(stage); if (stageIndex != -1) - return &m_specInfos[stageIndex]; - return nullptr; + return {&m_specInfos[stageIndex], 1}; + return {}; } - const SShaderSpecInfo* getSpecInfo(const hlsl::ShaderStage stage) const + std::span getSpecInfo(const hlsl::ShaderStage stage) const { const auto stageIndex = stageToIndex(stage); if (stageIndex != -1) - return &m_specInfos[stageIndex]; - return nullptr; + return {&m_specInfos[stageIndex], 1}; + return {}; } inline bool valid() const override diff --git a/include/nbl/asset/ICPUMeshPipeline.h b/include/nbl/asset/ICPUMeshPipeline.h index 1b48ed06f6..b4668d786f 100644 --- a/include/nbl/asset/ICPUMeshPipeline.h +++ b/include/nbl/asset/ICPUMeshPipeline.h @@ -52,23 +52,25 @@ class ICPUMeshPipeline final : public ICPUPipeline getSpecInfo(const hlsl::ShaderStage stage) { - if (!isMutable()) return nullptr; + if (!isMutable()) return {}; switch (stage) { - case hlsl::ShaderStage::ESS_TASK: return &m_specInfos[0]; - case hlsl::ShaderStage::ESS_MESH: return &m_specInfos[1]; - case hlsl::ShaderStage::ESS_FRAGMENT: return &m_specInfos[2]; + case hlsl::ShaderStage::ESS_TASK: return { &m_specInfos[0], 1 }; + case hlsl::ShaderStage::ESS_MESH: return { &m_specInfos[1], 1 }; + case hlsl::ShaderStage::ESS_FRAGMENT: return { &m_specInfos[2], 1 }; } - return nullptr; + return {}; } - const SShaderSpecInfo* getSpecInfo(const hlsl::ShaderStage stage) const + std::span getSpecInfo(const hlsl::ShaderStage stage) const { - const auto stageIndex = stageToIndex(stage); - if (stageIndex != -1) - return &m_specInfos[stageIndex]; - return nullptr; + switch (stage) { + case hlsl::ShaderStage::ESS_TASK: return { &m_specInfos[0], 1 }; + case hlsl::ShaderStage::ESS_MESH: return { &m_specInfos[1], 1 }; + case hlsl::ShaderStage::ESS_FRAGMENT: return { &m_specInfos[2], 1 }; + } + return {}; } inline bool valid() const override @@ -102,10 +104,12 @@ class ICPUMeshPipeline final : public ICPUPipeline= MESH_SHADER_STAGE_COUNT || hlsl::bitCount(stage)!=1) - return -1; - return stageIx; + switch(stage){ + case hlsl::ShaderStage::ESS_TASK: return 0; + case hlsl::ShaderStage::ESS_MESH: return 1; + case hlsl::ShaderStage::ESS_FRAGMENT: return 2; + } + return -1; } static inline hlsl::ShaderStage indexToStage(const int8_t index) diff --git a/include/nbl/asset/IMeshPipeline.h b/include/nbl/asset/IMeshPipeline.h new file mode 100644 index 0000000000..8fa850225d --- /dev/null +++ b/include/nbl/asset/IMeshPipeline.h @@ -0,0 +1,49 @@ +#ifndef _NBL_ASSET_I_MESH_PIPELINE_H_INCLUDED_ +#define _NBL_ASSET_I_MESH_PIPELINE_H_INCLUDED_ + +#include "nbl/asset/IShader.h" +#include "nbl/asset/RasterizationStates.h" +#include "nbl/asset/IPipeline.h" + + +namespace nbl::asset { + class IMeshPipelineBase : public virtual core::IReferenceCounted { + public: + constexpr static inline uint8_t MESH_SHADER_STAGE_COUNT = 3u; + struct SCachedCreationParams final { + SRasterizationParams rasterization = {}; + SBlendParams blend = {}; + uint32_t subpassIx = 0u; + uint8_t requireFullSubgroups = false; + }; + + }; + + template + class IMeshPipeline : public IPipeline, public IMeshPipelineBase { + protected: + using renderpass_t = RenderpassType; + public: + inline const SCachedCreationParams& getCachedCreationParams() const { return m_params; } + inline const renderpass_t* getRenderpass() const {return m_renderpass.get();} + + static inline bool hasRequiredStages(const core::bitflag& stagePresence) + { + return stagePresence.hasFlags(hlsl::ShaderStage::ESS_MESH); + } + + protected: + explicit IMeshPipeline(PipelineLayoutType* layout, const SCachedCreationParams& cachedParams, renderpass_t* renderpass) : + IPipeline(core::smart_refctd_ptr(layout)), + m_params(cachedParams), m_renderpass(core::smart_refctd_ptr(renderpass)) + { + } + + SCachedCreationParams m_params = {}; + core::smart_refctd_ptr m_renderpass = nullptr; + }; + +} + + +#endif \ No newline at end of file diff --git a/include/nbl/video/IGPUCommandBuffer.h b/include/nbl/video/IGPUCommandBuffer.h index 63552efa20..fa2520b1ae 100644 --- a/include/nbl/video/IGPUCommandBuffer.h +++ b/include/nbl/video/IGPUCommandBuffer.h @@ -436,19 +436,20 @@ class NBL_API2 IGPUCommandBuffer : public IBackendObject ); //! dispatches - bool dispatch(const uint32_t groupCountX, const uint32_t groupCountY=1, const uint32_t groupCountZ=1); - template requires std::is_integral_v - bool dispatch(const hlsl::vector groupCount) - { - return dispatch(groupCount.x,groupCount.y,groupCount.z); + bool dispatch(const hlsl::vector groupCount); + inline bool dispatch(const uint32_t groupCountX, const uint32_t groupCountY=1, const uint32_t groupCountZ=1) + { + return dispatch(hlsl::vector{groupCountX, groupCountY, groupCountZ}); } bool dispatchIndirect(const asset::SBufferBinding& binding); - bool drawMeshTasks(const uint32_t groupCountX, const uint32_t groupCountY = 1, const uint32_t groupCountZ = 1); - inline bool drawMeshTasks(const hlsl::vector groupCount) { - return drawMeshTasks(groupCount.x, groupCount.y, groupCount.z); + bool drawMeshTasks(const hlsl::vector groupCount); + inline bool drawMeshTasks(const uint32_t groupCountX, const uint32_t groupCountY = 1, const uint32_t groupCountZ = 1) + { + return drawMeshTasks(hlsl::vector{groupCountX, groupCountY, groupCountZ}); } bool drawMeshTasksIndirect(const asset::SBufferBinding& binding, const uint32_t drawCount, uint32_t stride); + bool drawMeshTasksIndirectCount(const asset::SBufferBinding& indirectBinding, const asset::SBufferBinding& countBinding, const uint32_t maxDrawCount, const uint32_t stride); //! Begin/End RenderPasses struct SRenderpassBeginInfo @@ -719,13 +720,15 @@ class NBL_API2 IGPUCommandBuffer : public IBackendObject virtual bool draw_impl(const uint32_t vertexCount, const uint32_t instanceCount, const uint32_t firstVertex, const uint32_t firstInstance) = 0; virtual bool drawIndexed_impl(const uint32_t indexCount, const uint32_t instanceCount, const uint32_t firstIndex, const int32_t vertexOffset, const uint32_t firstInstance) = 0; + virtual bool drawMeshTasks_impl(const uint32_t groupCountX, const uint32_t groupCountY, const uint32_t groupCountZ) = 0; + virtual bool drawIndirect_impl(const asset::SBufferBinding& binding, const uint32_t drawCount, const uint32_t stride) = 0; virtual bool drawIndexedIndirect_impl(const asset::SBufferBinding& binding, const uint32_t drawCount, const uint32_t stride) = 0; + virtual bool drawMeshTasksIndirect_impl(const asset::SBufferBinding& binding, const uint32_t drawCount, const uint32_t stride) = 0; + virtual bool drawIndirectCount_impl(const asset::SBufferBinding& indirectBinding, const asset::SBufferBinding& countBinding, const uint32_t maxDrawCount, const uint32_t stride) = 0; virtual bool drawIndexedIndirectCount_impl(const asset::SBufferBinding& indirectBinding, const asset::SBufferBinding& countBinding, const uint32_t maxDrawCount, const uint32_t stride) = 0; - - virtual bool drawMeshTasks_impl(const uint32_t groupCountX, const uint32_t groupCountY, const uint32_t groupCountZ) = 0; - virtual bool drawMeshTasksIndirect_impl(const asset::SBufferBinding& binding, const uint32_t drawCount, const uint32_t stride) = 0; + virtual bool drawMeshTasksIndirectCount_impl(const asset::SBufferBinding& indirectBinding, const asset::SBufferBinding& countBinding, const uint32_t maxDrawCount, const uint32_t stride) = 0; virtual bool blitImage_impl(const IGPUImage* const srcImage, const IGPUImage::LAYOUT srcImageLayout, IGPUImage* const dstImage, const IGPUImage::LAYOUT dstImageLayout, const std::span regions, const IGPUSampler::E_TEXTURE_FILTER filter) = 0; virtual bool resolveImage_impl(const IGPUImage* const srcImage, const IGPUImage::LAYOUT srcImageLayout, IGPUImage* const dstImage, const IGPUImage::LAYOUT dstImageLayout, const uint32_t regionCount, const SImageResolve* pRegions) = 0; @@ -907,10 +910,10 @@ class NBL_API2 IGPUCommandBuffer : public IBackendObject bool invalidDynamic(const uint32_t first, const uint32_t count); - template requires nbl::is_any_of_v + template requires nbl::is_any_of_v bool invalidDrawIndirect(const asset::SBufferBinding& binding, const uint32_t drawCount, uint32_t stride); - template requires nbl::is_any_of_v + template requires nbl::is_any_of_v bool invalidDrawIndirectCount(const asset::SBufferBinding& indirectBinding, const asset::SBufferBinding& countBinding, const uint32_t maxDrawCount, const uint32_t stride); core::smart_refctd_ptr* reserveReferences(const uint32_t size); diff --git a/include/nbl/video/IGPUGraphicsPipeline.h b/include/nbl/video/IGPUGraphicsPipeline.h index 79e1337787..880de909e4 100644 --- a/include/nbl/video/IGPUGraphicsPipeline.h +++ b/include/nbl/video/IGPUGraphicsPipeline.h @@ -44,7 +44,7 @@ class IGPUGraphicsPipeline : public IGPUPipeline=renderpass->getSubpassCount()) return {}; - // TODO: check rasterization samples, etc. + // TODO: check rasterization samples, etc. //when done, also add to IGPUMeshPipeline //rp->getCreationParameters().subpasses[i] core::bitflag stagePresence = {}; diff --git a/include/nbl/video/IGPUMeshPipeline.h b/include/nbl/video/IGPUMeshPipeline.h index 7878704978..a7770ee8d3 100644 --- a/include/nbl/video/IGPUMeshPipeline.h +++ b/include/nbl/video/IGPUMeshPipeline.h @@ -9,43 +9,12 @@ //related spec -//i feel like this MIGHT get stuffed into graphicspipeline but idk - /* https://registry.khronos.org/vulkan/specs/latest/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-PrimitiveId-06264 -** If the pipeline requires pre-rasterization shader state, it includes a mesh shader and the fragment shader code reads from an input variable that is decorated with PrimitiveId, then the mesh shader code must write to a matching output variable, decorated with PrimitiveId, in all execution paths - -https://registry.khronos.org/vulkan/specs/latest/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-renderPass-07064 -* If renderPass is not VK_NULL_HANDLE, the pipeline is being created with pre-rasterization shader state, subpass viewMask is not 0, and multiviewMeshShader is not enabled, then pStages must not include a mesh shader - -https://registry.khronos.org/vulkan/specs/latest/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-None-02322 -* If the pipeline requires pre-rasterization shader state, and there are any mesh shader stages in the pipeline there must not be any shader stage in the pipeline with a Xfb execution mode -*** whats a xfb - -https://registry.khronos.org/vulkan/specs/latest/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-shaderMeshEnqueue-10187 -* If the shaderMeshEnqueue feature is not enabled, shaders specified by pStages must not declare the ShaderEnqueueAMDX capability -https://registry.khronos.org/vulkan/specs/latest/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-flags-10188 -* If flags does not include VK_PIPELINE_CREATE_LIBRARY_BIT_KHR, shaders specified by pStages must not declare the ShaderEnqueueAMDX capability -*** my understanding is nabla strictly controls it's extensions, so this shouldnt be an issue - -https://registry.khronos.org/vulkan/specs/latest/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-pDynamicStates-07065 -* If the pipeline requires pre-rasterization shader state, and includes a mesh shader, there must be no element of the -* pDynamicStates member of pDynamicState set to VK_DYNAMIC_STATE_PRIMITIVE_TOPOLOGY, or VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE -*** this one seems the most relevant - -https://registry.khronos.org/vulkan/specs/latest/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-pDynamicStates-07066 -* If the pipeline requires pre-rasterization shader state, and includes a mesh shader, there must be no element of the -* pDynamicStates member of pDynamicState set to VK_DYNAMIC_STATE_PRIMITIVE_RESTART_ENABLE, or VK_DYNAMIC_STATE_PATCH_CONTROL_POINTS_EXT - -https://registry.khronos.org/vulkan/specs/latest/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-pDynamicStates-07067 -* If the pipeline requires pre-rasterization shader state, and includes a mesh shader, there must be no element of the pDynamicStates member of pDynamicState set to VK_DYNAMIC_STATE_VERTEX_INPUT_EXT - -https://registry.khronos.org/vulkan/specs/latest/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-renderPass-07720 -* If renderPass is VK_NULL_HANDLE, the pipeline is being created with pre-rasterization shader state, and -* VkPipelineRenderingCreateInfo::viewMask is not 0, and multiviewMeshShader is not enabled, then pStages must not include a mesh shader - +* If the pipeline requires pre-rasterization shader state, it includes a mesh shader and the fragment shader code reads from an input variable that is decorated +* with PrimitiveId, then the mesh shader code must write to a matching output variable, decorated with PrimitiveId, in all execution paths -* theres 1 or 2 more about pipeline libraries, but im not going to worry about that +* theres a few more about pipeline libraries that aren't included */ namespace nbl::video @@ -81,7 +50,7 @@ namespace nbl::video if (!renderpass || cached.subpassIx >= renderpass->getSubpassCount()) return {}; - // TODO: check rasterization samples, etc. + // TODO: check rasterization samples, etc. (sourced from IGPUGraphicsPipeline) //rp->getCreationParameters().subpasses[i] core::bitflag stagePresence = {}; @@ -137,7 +106,7 @@ namespace nbl::video inline uint32_t getShaderCount() const { - uint32_t count = 0; //count = 2 and only check task shader?? + uint32_t count = 0; count += (taskShader.shader != nullptr); count += (meshShader.shader != nullptr); count += (fragmentShader.shader != nullptr); diff --git a/include/nbl/video/ILogicalDevice.h b/include/nbl/video/ILogicalDevice.h index 19d44b2486..37a5565089 100644 --- a/include/nbl/video/ILogicalDevice.h +++ b/include/nbl/video/ILogicalDevice.h @@ -1345,6 +1345,12 @@ class NBL_API2 ILogicalDevice : public core::IReferenceCounted, public IDeviceMe void addCommonShaderDefines(const bool runningInRenderDoc); + bool createRasterizationPipelineCommonValidation( + const IGPURenderpass* renderpass, uint8_t subpassIndex, + nbl::asset::SRasterizationParams const& rasterParams, + nbl::asset::SBlendParams const& blendParams + ); + inline bool invalidAllocationForBind(const IDeviceMemoryBacked* resource, const IDeviceMemoryBacked::SMemoryBinding& binding, const size_t alignment) { if (!resource->wasCreatedBy(this)) diff --git a/include/nbl/video/utilities/CAssetConverter.h b/include/nbl/video/utilities/CAssetConverter.h index a360e3b0f5..65c6008472 100644 --- a/include/nbl/video/utilities/CAssetConverter.h +++ b/include/nbl/video/utilities/CAssetConverter.h @@ -47,10 +47,11 @@ class CAssetConverter : public core::IReferenceCounted asset::ICPUDescriptorSetLayout, asset::ICPUPipelineLayout, asset::ICPUPipelineCache, - asset::ICPUComputePipeline, - asset::ICPURayTracingPipeline, asset::ICPURenderpass, asset::ICPUGraphicsPipeline, + asset::ICPUComputePipeline, + asset::ICPUMeshPipeline, + asset::ICPURayTracingPipeline, asset::ICPUDescriptorSet, //asset::ICPUFramebuffer doesn't exist yet XD asset::ICPUPolygonGeometry @@ -690,10 +691,11 @@ class CAssetConverter : public core::IReferenceCounted bool operator()(lookup_t); bool operator()(lookup_t); bool operator()(lookup_t); - bool operator()(lookup_t); - bool operator()(lookup_t); bool operator()(lookup_t); bool operator()(lookup_t); + bool operator()(lookup_t); + bool operator()(lookup_t); + bool operator()(lookup_t); bool operator()(lookup_t); bool operator()(lookup_t); }; diff --git a/src/nbl/video/CVulkanCommandBuffer.cpp b/src/nbl/video/CVulkanCommandBuffer.cpp index f33966588e..047e823abc 100644 --- a/src/nbl/video/CVulkanCommandBuffer.cpp +++ b/src/nbl/video/CVulkanCommandBuffer.cpp @@ -789,6 +789,15 @@ bool CVulkanCommandBuffer::drawIndexedIndirectCount_impl(const asset::SBufferBin return true; } +bool CVulkanCommandBuffer::drawMeshTasksIndirectCount_impl(const asset::SBufferBinding& indirectBinding, const asset::SBufferBinding& countBinding, const uint32_t maxDrawCount, const uint32_t stride) +{ + getFunctionTable().vkCmdDrawMeshTasksIndirectCountEXT( + m_cmdbuf,static_cast(indirectBinding.buffer.get())->getInternalObject(),indirectBinding.offset, + static_cast(countBinding.buffer.get())->getInternalObject(),countBinding.offset,maxDrawCount,stride + ); + return true; +} + bool CVulkanCommandBuffer::blitImage_impl(const IGPUImage* const srcImage, const IGPUImage::LAYOUT srcImageLayout, IGPUImage* const dstImage, const IGPUImage::LAYOUT dstImageLayout, const std::span regions, const IGPUSampler::E_TEXTURE_FILTER filter) { VkImage vk_srcImage = static_cast(srcImage)->getInternalObject(); diff --git a/src/nbl/video/CVulkanCommandBuffer.h b/src/nbl/video/CVulkanCommandBuffer.h index ba3925ffe2..541c799cba 100644 --- a/src/nbl/video/CVulkanCommandBuffer.h +++ b/src/nbl/video/CVulkanCommandBuffer.h @@ -210,9 +210,6 @@ class CVulkanCommandBuffer final : public IGPUCommandBuffer bool dispatch_impl(const uint32_t groupCountX, const uint32_t groupCountY, const uint32_t groupCountZ) override; bool dispatchIndirect_impl(const asset::SBufferBinding& binding) override; - bool drawMeshTasks_impl(const uint32_t groupCountX, const uint32_t groupCountY, const uint32_t groupCountZ) override; - bool drawMeshTasksIndirect_impl(const asset::SBufferBinding& binding, const uint32_t drawCount, const uint32_t stride) override; - bool beginRenderPass_impl(const SRenderpassBeginInfo& info, SUBPASS_CONTENTS contents) override; bool nextSubpass_impl(const SUBPASS_CONTENTS contents) override; bool endRenderPass_impl() override; @@ -221,10 +218,15 @@ class CVulkanCommandBuffer final : public IGPUCommandBuffer bool draw_impl(const uint32_t vertexCount, const uint32_t instanceCount, const uint32_t firstVertex, const uint32_t firstInstance) override; bool drawIndexed_impl(const uint32_t indexCount, const uint32_t instanceCount, const uint32_t firstIndex, const int32_t vertexOffset, const uint32_t firstInstance) override; + bool drawMeshTasks_impl(const uint32_t groupCountX, const uint32_t groupCountY, const uint32_t groupCountZ) override; + bool drawIndirect_impl(const asset::SBufferBinding& binding, const uint32_t drawCount, const uint32_t stride) override; bool drawIndexedIndirect_impl(const asset::SBufferBinding& binding, const uint32_t drawCount, const uint32_t stride) override; + bool drawMeshTasksIndirect_impl(const asset::SBufferBinding& binding, const uint32_t drawCount, const uint32_t stride) override; + bool drawIndirectCount_impl(const asset::SBufferBinding& indirectBinding, const asset::SBufferBinding& countBinding, const uint32_t maxDrawCount, const uint32_t stride) override; bool drawIndexedIndirectCount_impl(const asset::SBufferBinding& indirectBinding, const asset::SBufferBinding& countBinding, const uint32_t maxDrawCount, const uint32_t stride) override; + bool drawMeshTasksIndirectCount_impl(const asset::SBufferBinding& indirectBinding, const asset::SBufferBinding& countBinding, const uint32_t maxDrawCount, const uint32_t stride) override; bool blitImage_impl(const IGPUImage* const srcImage, const IGPUImage::LAYOUT srcImageLayout, IGPUImage* const dstImage, const IGPUImage::LAYOUT dstImageLayout, const std::span regions, const IGPUSampler::E_TEXTURE_FILTER filter) override; bool resolveImage_impl(const IGPUImage* const srcImage, const IGPUImage::LAYOUT srcImageLayout, IGPUImage* const dstImage, const IGPUImage::LAYOUT dstImageLayout, const uint32_t regionCount, const SImageResolve* pRegions) override; diff --git a/src/nbl/video/CVulkanLogicalDevice.cpp b/src/nbl/video/CVulkanLogicalDevice.cpp index 34a24d30d7..3df77e5634 100644 --- a/src/nbl/video/CVulkanLogicalDevice.cpp +++ b/src/nbl/video/CVulkanLogicalDevice.cpp @@ -1280,7 +1280,7 @@ void PopulateColorBlend( } template -void PopulateMeshGraphicsCommonData( +void PopulateRasterizationPipelineCommonData( const std::span createInfos, core::vector& vk_createInfos, @@ -1346,19 +1346,16 @@ core::vector getDefaultDynamicStates(SPhysicalDeviceFeatures con } // TODO: VK_DYNAMIC_STATE_DISCARD_RECTANGLE_EXT, VK_DYNAMIC_STATE_DISCARD_RECTANGLE_ENABLE_EXT, VK_DYNAMIC_STATE_DISCARD_RECTANGLE_MODE_EXT + /* + specs on dynamic state with mesh pipelines, notes for the future + https://registry.khronos.org/vulkan/specs/latest/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-pDynamicStates-07065 + https://registry.khronos.org/vulkan/specs/latest/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-pDynamicStates-07066 + https://registry.khronos.org/vulkan/specs/latest/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-pDynamicStates-07067 + */ + return ret; } -//maximum cleanliness,i tried it and im not a big fan -//struct CommonPipelineStruct { -// VkPipelineRasterizationStateCreateInfo vk_rasterizationStates{ VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,nullptr,0 }; -// VkPipelineMultisampleStateCreateInfo vk_multisampleStates{ VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,nullptr,0 }; -// VkPipelineDepthStencilStateCreateInfo vk_depthStencilStates{ VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,nullptr,0 }; -// VkPipelineColorBlendStateCreateInfo vk_colorBlendStates{ VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,nullptr,0 }; -// core::vector vk_colorBlendAttachmentStates{ IGPURenderpass::SCreationParams::SSubpassDescription::MaxColorAttachments }; -//}; - - void CVulkanLogicalDevice::createMeshPipelines_impl( IGPUPipelineCache* const pipelineCache, const std::span createInfos, @@ -1396,7 +1393,7 @@ void CVulkanLogicalDevice::createMeshPipelines_impl( .pScissors = nullptr, }); - PopulateMeshGraphicsCommonData( + PopulateRasterizationPipelineCommonData( createInfos, vk_createInfos, vk_viewportStates, @@ -1409,12 +1406,16 @@ void CVulkanLogicalDevice::createMeshPipelines_impl( vk_dynamicStates, vk_dynamicStateCreateInfo ); - //not used in mesh pipelines + /* + not used in mesh pipelines + + shoudl already be nullptr, leaving the comment for clarity for (auto& outCreateInfo : vk_createInfos) { outCreateInfo.pVertexInputState = nullptr; outCreateInfo.pInputAssemblyState = nullptr; outCreateInfo.pTessellationState = nullptr; } + */ auto outCreateInfo = vk_createInfos.data(); const auto maxShaderStages = createInfos.size() * IGPUMeshPipeline::MESH_SHADER_STAGE_COUNT; @@ -1526,17 +1527,19 @@ void CVulkanLogicalDevice::createGraphicsPipelines_impl( .dynamicStateCount = static_cast(vk_dynamicStates.size()), .pDynamicStates = vk_dynamicStates.data() }; - core::vector vk_viewportStates(createInfos.size(), { - .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, - .pNext = nullptr, // the extensions that interest us have a dynamic state variant anyway - .flags = 0, // must be 0 - .viewportCount = 0, - .pViewports = nullptr, - .scissorCount = 0, - .pScissors = nullptr, - }); + core::vector vk_viewportStates(createInfos.size(), + { + .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, + .pNext = nullptr, // the extensions that interest us have a dynamic state variant anyway + .flags = 0, // must be 0 + .viewportCount = 0, + .pViewports = nullptr, + .scissorCount = 0, + .pScissors = nullptr, + } + ); - PopulateMeshGraphicsCommonData( + PopulateRasterizationPipelineCommonData( createInfos, vk_createInfos, vk_viewportStates, @@ -1749,6 +1752,7 @@ void CVulkanLogicalDevice::createRayTracingPipelines_impl( size_t maxShaderGroups = 0; for (const auto& info : createInfos) maxShaderGroups += info.shaderGroups.getShaderGroupCount(); + core::vector vk_createInfos(createInfos.size(), { VK_STRUCTURE_TYPE_RAY_TRACING_PIPELINE_CREATE_INFO_KHR,nullptr }); core::vector vk_shaderModule(maxShaderStages,{VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,nullptr, 0}); core::vector entryPoints(maxShaderStages); diff --git a/src/nbl/video/CVulkanMeshPipeline.cpp b/src/nbl/video/CVulkanMeshPipeline.cpp index 8fa3cc63eb..b28bb9cc76 100644 --- a/src/nbl/video/CVulkanMeshPipeline.cpp +++ b/src/nbl/video/CVulkanMeshPipeline.cpp @@ -4,24 +4,23 @@ namespace nbl::video { +CVulkanMeshPipeline::~CVulkanMeshPipeline() +{ + const CVulkanLogicalDevice* vulkanDevice = static_cast(getOriginDevice()); + auto* vk = vulkanDevice->getFunctionTable(); + vk->vk.vkDestroyPipeline(vulkanDevice->getInternalObject(), m_vkPipeline, nullptr); +} +void CVulkanMeshPipeline::setObjectDebugName(const char* label) const +{ + IBackendObject::setObjectDebugName(label); - CVulkanMeshPipeline::~CVulkanMeshPipeline() - { - const CVulkanLogicalDevice* vulkanDevice = static_cast(getOriginDevice()); - auto* vk = vulkanDevice->getFunctionTable(); - vk->vk.vkDestroyPipeline(vulkanDevice->getInternalObject(), m_vkPipeline, nullptr); - } - void CVulkanMeshPipeline::setObjectDebugName(const char* label) const - { - IBackendObject::setObjectDebugName(label); - - if (vkSetDebugUtilsObjectNameEXT == 0) return; + if (vkSetDebugUtilsObjectNameEXT == 0) return; - const CVulkanLogicalDevice* vulkanDevice = static_cast(getOriginDevice()); - VkDebugUtilsObjectNameInfoEXT nameInfo = { VK_STRUCTURE_TYPE_DEBUG_UTILS_OBJECT_NAME_INFO_EXT, nullptr }; - nameInfo.objectType = VK_OBJECT_TYPE_PIPELINE; - nameInfo.objectHandle = reinterpret_cast(getInternalObject()); - nameInfo.pObjectName = getObjectDebugName(); - vkSetDebugUtilsObjectNameEXT(vulkanDevice->getInternalObject(), &nameInfo); - } + const CVulkanLogicalDevice* vulkanDevice = static_cast(getOriginDevice()); + VkDebugUtilsObjectNameInfoEXT nameInfo = { VK_STRUCTURE_TYPE_DEBUG_UTILS_OBJECT_NAME_INFO_EXT, nullptr }; + nameInfo.objectType = VK_OBJECT_TYPE_PIPELINE; + nameInfo.objectHandle = reinterpret_cast(getInternalObject()); + nameInfo.pObjectName = getObjectDebugName(); + vkSetDebugUtilsObjectNameEXT(vulkanDevice->getInternalObject(), &nameInfo); +} } \ No newline at end of file diff --git a/src/nbl/video/CVulkanPhysicalDevice.cpp b/src/nbl/video/CVulkanPhysicalDevice.cpp index 163bd93026..69958ddb0b 100644 --- a/src/nbl/video/CVulkanPhysicalDevice.cpp +++ b/src/nbl/video/CVulkanPhysicalDevice.cpp @@ -829,8 +829,8 @@ std::unique_ptr CVulkanPhysicalDevice::create(core::smart if (isExtensionSupported(VK_EXT_MESH_SHADER_EXTENSION_NAME)) { features.meshShader = meshShaderFeatures.meshShader; features.taskShader = meshShaderFeatures.taskShader; + //properties.limits.multiviewMeshShader = meshShaderFeatures.multiviewMeshShader; //TODO - //VkBool32 multiviewMeshShader; //VkBool32 primitiveFragmentShadingRateMeshShader; //VkBool32 meshShaderQueries; @@ -963,12 +963,12 @@ std::unique_ptr CVulkanPhysicalDevice::create(core::smart properties.limits.shaderSharedInt64Atomics = vulkan12Features.shaderSharedInt64Atomics; properties.limits.shaderFloat16 = vulkan12Features.shaderFloat16; - if (!vulkan12Features.shaderFloat16) - { - // only fail if 16bit floats can be used - if (!vulkan12Properties.shaderSignedZeroInfNanPreserveFloat16) - RETURN_NULL_PHYSICAL_DEVICE; - } + if (!vulkan12Features.shaderFloat16) + { + // only fail if 16bit floats can be used + if (!vulkan12Properties.shaderSignedZeroInfNanPreserveFloat16) + RETURN_NULL_PHYSICAL_DEVICE; + } if (!vulkan12Features.shaderInt8) RETURN_NULL_PHYSICAL_DEVICE; @@ -1872,7 +1872,7 @@ core::smart_refctd_ptr CVulkanPhysicalDevice::createLogicalDevic meshShaderFeatures.meshShader = enabledFeatures.meshShader; meshShaderFeatures.primitiveFragmentShadingRateMeshShader = VK_FALSE;//needs to be explicitly set? meshShaderFeatures.meshShaderQueries = VK_FALSE; - meshShaderFeatures.multiviewMeshShader = VK_FALSE; + //meshShaderFeatures.multiviewMeshShader = limits.multiviewMeshShader; //shaderClockFeatures [LIMIT SO ENABLE EVERYTHING BY DEFAULT] diff --git a/src/nbl/video/IGPUCommandBuffer.cpp b/src/nbl/video/IGPUCommandBuffer.cpp index 1807829b1b..9176c6b962 100644 --- a/src/nbl/video/IGPUCommandBuffer.cpp +++ b/src/nbl/video/IGPUCommandBuffer.cpp @@ -1448,26 +1448,26 @@ bool IGPUCommandBuffer::copyQueryPoolResults( } -bool IGPUCommandBuffer::dispatch(const uint32_t groupCountX, const uint32_t groupCountY, const uint32_t groupCountZ) +bool IGPUCommandBuffer::dispatch(const hlsl::vector groupCount) { if (!checkStateBeforeRecording(queue_flags_t::COMPUTE_BIT,RENDERPASS_SCOPE::OUTSIDE)) return false; - if (groupCountX==0 || groupCountY==0 || groupCountZ==0) + if (groupCount.x==0 || groupCount.y==0 || groupCount.z==0) { - NBL_LOG_ERROR("invalid group counts (%d, %d, %d)!", groupCountX, groupCountY, groupCountZ); + NBL_LOG_ERROR("invalid group counts (%d, %d, %d)!", groupCount.x, groupCount.y, groupCount.z); return false; } const auto& limits = getOriginDevice()->getPhysicalDevice()->getLimits(); - if (groupCountX>limits.maxComputeWorkGroupCount[0] || groupCountY>limits.maxComputeWorkGroupCount[1] || groupCountZ>limits.maxComputeWorkGroupCount[2]) + if (groupCount.x>limits.maxComputeWorkGroupCount[0] || groupCount.y>limits.maxComputeWorkGroupCount[1] || groupCount.z>limits.maxComputeWorkGroupCount[2]) { - NBL_LOG_ERROR("group counts (%d, %d, %d) exceeds maximum counts (%d, %d, %d)!", groupCountX, groupCountY, groupCountZ, limits.maxComputeWorkGroupCount[0], limits.maxComputeWorkGroupCount[1], limits.maxComputeWorkGroupCount[2]); + NBL_LOG_ERROR("group counts (%d, %d, %d) exceeds maximum counts (%d, %d, %d)!", groupCount.x, groupCount.y, groupCount.z, limits.maxComputeWorkGroupCount[0], limits.maxComputeWorkGroupCount[1], limits.maxComputeWorkGroupCount[2]); return false; } m_noCommands = false; - return dispatch_impl(groupCountX,groupCountY,groupCountZ); + return dispatch_impl(groupCount.x,groupCount.y,groupCount.z); } bool IGPUCommandBuffer::dispatchIndirect(const asset::SBufferBinding& binding) @@ -1488,58 +1488,24 @@ bool IGPUCommandBuffer::dispatchIndirect(const asset::SBufferBinding groupCount) { if (!checkStateBeforeRecording(queue_flags_t::GRAPHICS_BIT, RENDERPASS_SCOPE::INSIDE)) return false; - if (groupCountX == 0 || groupCountY == 0 || groupCountZ == 0) + if (groupCount.x == 0 || groupCount.y == 0 || groupCount.z == 0) { - NBL_LOG_ERROR("invalid group counts (%d, %d, %d)!", groupCountX, groupCountY, groupCountZ); + NBL_LOG_ERROR("invalid group counts (%d, %d, %d)!", groupCount.x, groupCount.y, groupCount.z); return false; } const auto& limits = getOriginDevice()->getPhysicalDevice()->getLimits(); - if (groupCountX > limits.maxMeshWorkGroupCount[0] || groupCountY > limits.maxMeshWorkGroupCount[1] || groupCountZ > limits.maxMeshWorkGroupCount[2]) - { - NBL_LOG_ERROR("group counts (%d, %d, %d) exceeds maximum counts (%d, %d, %d)!", groupCountX, groupCountY, groupCountZ, limits.maxMeshWorkGroupCount[0], limits.maxMeshWorkGroupCount[1], limits.maxMeshWorkGroupCount[2]); + if (groupCount.x > limits.maxMeshWorkGroupCount[0] || groupCount.y > limits.maxMeshWorkGroupCount[1] || groupCount.z > limits.maxMeshWorkGroupCount[2]) { + NBL_LOG_ERROR("group counts (%d, %d, %d) exceeds maximum counts (%d, %d, %d)!", groupCount.x, groupCount.y, groupCount.z, limits.maxMeshWorkGroupCount[0], limits.maxMeshWorkGroupCount[1], limits.maxMeshWorkGroupCount[2]); return false; } m_noCommands = false; - return drawMeshTasks_impl(groupCountX, groupCountY, groupCountZ); -} - -bool IGPUCommandBuffer::drawMeshTasksIndirect(const asset::SBufferBinding& binding, const uint32_t drawCount, uint32_t stride) -{ - if (!checkStateBeforeRecording(queue_flags_t::GRAPHICS_BIT,RENDERPASS_SCOPE::INSIDE)) - return false; - if (invalidBufferBinding(binding,4u/*TODO: is it really 4?*/,IGPUBuffer::EUF_INDIRECT_BUFFER_BIT)){ - return false; - } - - if (drawCount) { - if (drawCount==1u) - stride = sizeof(hlsl::DrawMeshTasksIndirectCommand_t); - if (stride&0x3u || stride getOriginDevice()->getPhysicalDevice()->getLimits().maxDrawIndirectCount) { - NBL_LOG_ERROR("draw count (%d) exceeds maximum allowed amount (%d)!", drawCount, getOriginDevice()->getPhysicalDevice()->getLimits().maxDrawIndirectCount); - return false; - } - if (invalidBufferRange({ binding.offset,stride * (drawCount - 1u) + sizeof(hlsl::DrawMeshTasksIndirectCommand_t),binding.buffer }, alignof(uint32_t), IGPUBuffer::EUF_INDIRECT_BUFFER_BIT)) - return false; - } // i get the feeling the vk command shouldnt be called if drawCount is 0, but this is how drawindirect does it - - if (!m_cmdpool->m_commandListPool.emplace(m_commandList,core::smart_refctd_ptr(binding.buffer))) - { - NBL_LOG_ERROR("out of host memory!"); - return false; - } - - m_noCommands = false; - return drawMeshTasksIndirect_impl(binding, drawCount, stride); + return drawMeshTasks_impl(groupCount.x, groupCount.y, groupCount.z); } bool IGPUCommandBuffer::beginRenderPass(SRenderpassBeginInfo info, const SUBPASS_CONTENTS contents) @@ -1740,7 +1706,7 @@ bool IGPUCommandBuffer::drawIndexed(const uint32_t indexCount, const uint32_t in return drawIndexed_impl(indexCount,instanceCount,firstIndex,vertexOffset,firstInstance); } -template requires nbl::is_any_of_v +template requires nbl::is_any_of_v bool IGPUCommandBuffer::invalidDrawIndirect(const asset::SBufferBinding& binding, const uint32_t drawCount, uint32_t stride) { if (!checkStateBeforeRecording(queue_flags_t::GRAPHICS_BIT,RENDERPASS_SCOPE::INSIDE)) @@ -1767,8 +1733,9 @@ bool IGPUCommandBuffer::invalidDrawIndirect(const asset::SBufferBinding(const asset::SBufferBinding&, const uint32_t, uint32_t); template NBL_API2 bool IGPUCommandBuffer::invalidDrawIndirect(const asset::SBufferBinding&, const uint32_t, uint32_t); +template NBL_API2 bool IGPUCommandBuffer::invalidDrawIndirect(const asset::SBufferBinding&, const uint32_t, uint32_t); -template requires nbl::is_any_of_v +template requires nbl::is_any_of_v bool IGPUCommandBuffer::invalidDrawIndirectCount(const asset::SBufferBinding& indirectBinding, const asset::SBufferBinding& countBinding, const uint32_t maxDrawCount, const uint32_t stride) { if (!getOriginDevice()->getPhysicalDevice()->getLimits().drawIndirectCount) @@ -1786,6 +1753,7 @@ bool IGPUCommandBuffer::invalidDrawIndirectCount(const asset::SBufferBinding(const asset::SBufferBinding&, const asset::SBufferBinding&, const uint32_t, const uint32_t); template NBL_API2 bool IGPUCommandBuffer::invalidDrawIndirectCount(const asset::SBufferBinding&, const asset::SBufferBinding&, const uint32_t, const uint32_t); +template NBL_API2 bool IGPUCommandBuffer::invalidDrawIndirectCount(const asset::SBufferBinding&, const asset::SBufferBinding&, const uint32_t, const uint32_t); bool IGPUCommandBuffer::drawIndirect(const asset::SBufferBinding& binding, const uint32_t drawCount, const uint32_t stride) { @@ -1817,6 +1785,21 @@ bool IGPUCommandBuffer::drawIndexedIndirect(const asset::SBufferBinding& binding, const uint32_t drawCount, uint32_t stride) +{ + if (invalidDrawIndirect(binding,drawCount,stride)) + return false; + + if (!m_cmdpool->m_commandListPool.emplace(m_commandList,core::smart_refctd_ptr(binding.buffer))) + { + NBL_LOG_ERROR("out of host memory!"); + return false; + } + + m_noCommands = false; + return drawMeshTasksIndirect_impl(binding, drawCount, stride); +} + bool IGPUCommandBuffer::drawIndirectCount(const asset::SBufferBinding& indirectBinding, const asset::SBufferBinding& countBinding, const uint32_t maxDrawCount, const uint32_t stride) { if (!invalidDrawIndirectCount(indirectBinding,countBinding,maxDrawCount,stride)) @@ -1847,6 +1830,21 @@ bool IGPUCommandBuffer::drawIndexedIndirectCount(const asset::SBufferBinding& indirectBinding, const asset::SBufferBinding& countBinding, const uint32_t maxDrawCount, const uint32_t stride) +{ + if (!invalidDrawIndirectCount(indirectBinding,countBinding,maxDrawCount,stride)) + return false; + + if (!m_cmdpool->m_commandListPool.emplace(m_commandList, core::smart_refctd_ptr(indirectBinding.buffer), core::smart_refctd_ptr(countBinding.buffer))) + { + NBL_LOG_ERROR("out of host memory!"); + return false; + } + + m_noCommands = false; + return drawMeshTasksIndirectCount_impl(indirectBinding, countBinding, maxDrawCount, stride); +} + /* bool IGPUCommandBuffer::drawMeshBuffer(const IGPUMeshBuffer* const meshBuffer) { diff --git a/src/nbl/video/ILogicalDevice.cpp b/src/nbl/video/ILogicalDevice.cpp index 79f7f507d4..374388366a 100644 --- a/src/nbl/video/ILogicalDevice.cpp +++ b/src/nbl/video/ILogicalDevice.cpp @@ -833,13 +833,18 @@ bool ILogicalDevice::createComputePipelines(IGPUPipelineCache* const pipelineCac return retval; } -bool MeshGraphicsCommonValidation( +bool ILogicalDevice::createRasterizationPipelineCommonValidation( const IGPURenderpass* renderpass, uint8_t subpassIndex, - SPhysicalDeviceLimits const& limits, SPhysicalDeviceFeatures const& features, - nbl::asset::SRasterizationParams const& rasterParams, nbl::asset::SBlendParams const& blendParams, - const system::logger_opt_ptr m_logger, - const IPhysicalDevice::SFormatImageUsages& formatUsages + nbl::asset::SRasterizationParams const& rasterParams, nbl::asset::SBlendParams const& blendParams ) { + if (!renderpass->wasCreatedBy(this)) { + NBL_LOG_ERROR("Invalid renderpass was given"); + return false; + } + + const auto& features = getEnabledFeatures(); + const auto& limits = getPhysicalDeviceLimits(); + if (rasterParams.alphaToOneEnable && !features.alphaToOne) { NBL_LOG_ERROR("Feature `alpha to one` is not enabled"); @@ -894,7 +899,7 @@ bool MeshGraphicsCommonValidation( { const auto& attachment = passParams.colorAttachments[render.attachmentIndex]; // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-renderPass-06041 - if (blendParams.blendParams[i].blendEnabled() && !formatUsages[attachment.format].attachmentBlend) + if (blendParams.blendParams[i].blendEnabled() && !getPhysicalDevice()->getImageFormatUsagesOptimalTiling()[attachment.format].attachmentBlend) { NBL_LOG_ERROR("Invalid color attachment (params[%u].colorAttachments[%u])", subpassIndex, i); return false; @@ -918,10 +923,7 @@ bool MeshGraphicsCommonValidation( return true; } -//this is a COPY of graphics pipeline, with MINOR adjustments. -//no changes should be made DIRECTLY here -//UNLESS it's DIRECTLY for mesh/task -//there SHOULD be a function that duplicates functionality between graphics and mesh pipeline that can be adjusted first +//attempt to put changes in createRasterizationPipelineCommonValidation first bool ILogicalDevice::createMeshPipelines( IGPUPipelineCache* const pipelineCache, const std::span params, @@ -947,31 +949,38 @@ bool ILogicalDevice::createMeshPipelines( for (auto ix = 0u; ix < params.size(); ix++) { const auto& ci = params[ix]; + const auto& passParams = ci.renderpass->getCreationParameters(); + const auto& subpass = passParams.subpasses[ci.cached.subpassIx]; - if (params[ix].taskShader.shader != nullptr) { + if (ci.taskShader.shader != nullptr) { if (!features.taskShader) { - NBL_LOG_ERROR("Feature `mesh shader` is not enabled"); + NBL_LOG_ERROR("Feature `task shader` is not enabled"); return false; } } - - //check extensions here - //it SEEMS like createGraphicsPipeline does, but it does it in a weird way I don't understand? - //geo and tess are just flat disabled?? + + //a mesh shader is required in mesh pipelines if (!features.meshShader) { NBL_LOG_ERROR("Feature `mesh shader` is not enabled"); return false; } + /* MOMENTARILY DISABLED, FIX BEFORE MERGING + if(subpass.viewMask){ + + //https://registry.khronos.org/vulkan/specs/latest/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-renderPass-07064 + //https://registry.khronos.org/vulkan/specs/latest/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-renderPass-07720 + if(!limits.multiviewMeshShader){ + NBL_LOG_ERROR("Feature 'multiviewMeshShader' is not enabled"); + return false; + } + } + */ - auto renderpass = ci.renderpass; - if (!renderpass->wasCreatedBy(this)) { - NBL_LOG_ERROR("Invalid renderpass was given (params[%u])", ix); + if (!createRasterizationPipelineCommonValidation(ci.renderpass, ci.cached.subpassIx, ci.cached.rasterization, ci.cached.blend)) { + NBL_LOG_ERROR("failed common rasterization pipline validation from mesh [%d]", ix); return false; } - - MeshGraphicsCommonValidation(renderpass, ci.cached.subpassIx, limits, features, ci.cached.rasterization, ci.cached.blend, m_logger, getPhysicalDevice()->getImageFormatUsagesOptimalTiling()); - SpirvTrimTask trimTask(m_spirvTrimmer.get(), m_logger); trimTask.insertEntryPoint(ci.taskShader, hlsl::ShaderStage::ESS_TASK); trimTask.insertEntryPoint(ci.meshShader, hlsl::ShaderStage::ESS_MESH); @@ -1026,32 +1035,54 @@ bool ILogicalDevice::createGraphicsPipelines( for (auto ix = 0u; ix < params.size(); ix++) { const auto& ci = params[ix]; + const auto& passParams = ci.renderpass->getCreationParameters(); + const auto& subpass = passParams.subpasses[ci.cached.subpassIx]; // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkPipelineShaderStageCreateInfo.html#VUID-VkPipelineShaderStageCreateInfo-stage-00704 // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkPipelineShaderStageCreateInfo.html#VUID-VkPipelineShaderStageCreateInfo-stage-00705 if (ci.tesselationControlShader.shader) { - NBL_LOG_ERROR("Cannot create IGPUShader for %p, Tessellation Shader feature not enabled!", ci.tesselationControlShader.shader); - return false; + if (!features.tessellationShader) + { + NBL_LOG_ERROR("Cannot create IGPUShader for %p, Tessellation Shader feature not enabled!", ci.tesselationControlShader.shader); + return false; + } + if (subpass.viewMask){ + if(!limits.multiviewTessellationShader){ + NBL_LOG_ERROR("Feature 'multiviewTessellationShader' is not enabled"); + return false; + } + } } if (ci.tesselationEvaluationShader.shader) { - NBL_LOG_ERROR("Cannot create IGPUShader for %p, Tessellation Shader feature not enabled!", ci.tesselationEvaluationShader.shader); - return false; + if (!features.tessellationShader) + { + NBL_LOG_ERROR("Cannot create IGPUShader for %p, Tessellation Shader feature not enabled!", ci.tesselationEvaluationShader.shader); + return false; + } + if (subpass.viewMask){ + if(!limits.multiviewTessellationShader){ + NBL_LOG_ERROR("Feature 'multiviewTessellationShader' is not enabled"); + return false; + } + } } if (ci.geometryShader.shader) { - NBL_LOG_ERROR("Cannot create IGPUShader for %p, Geometry Shader feature not enabled!", ci.geometryShader.shader); - return false; - } - - auto renderpass = ci.renderpass; - if (!renderpass->wasCreatedBy(this)) - { - NBL_LOG_ERROR("Invalid renderpass was given (params[%u])", ix); - return false; + if(!features.geometryShader) + { + NBL_LOG_ERROR("Cannot create IGPUShader for %p, Geometry Shader feature not enabled!", ci.geometryShader.shader); + return false; + } + if (subpass.viewMask){ + if(!limits.multiviewGeometryShader){ + NBL_LOG_ERROR("Feature 'multiviewGeometryShader' is not enabled"); + return false; + } + } } // TODO: loads more validation on extra parameters here! @@ -1060,7 +1091,10 @@ bool ILogicalDevice::createGraphicsPipelines( // TODO: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-subpass-01505 // baiscally the AMD version must have the rasterization samples equal to the maximum of all attachment samples counts - MeshGraphicsCommonValidation(renderpass, ci.cached.subpassIx, limits, features, ci.cached.rasterization, ci.cached.blend, m_logger, getPhysicalDevice()->getImageFormatUsagesOptimalTiling()); + if (!createRasterizationPipelineCommonValidation(ci.renderpass, ci.cached.subpassIx, ci.cached.rasterization, ci.cached.blend)) { + NBL_LOG_ERROR("failed common rasterization pipline validation from graphics [%d]", ix); + return false; + } SpirvTrimTask trimTask(m_spirvTrimmer.get(), m_logger); trimTask.insertEntryPoint(ci.vertexShader, hlsl::ShaderStage::ESS_VERTEX); diff --git a/src/nbl/video/utilities/CAssetConverter.cpp b/src/nbl/video/utilities/CAssetConverter.cpp index 69346a4049..4e47292b12 100644 --- a/src/nbl/video/utilities/CAssetConverter.cpp +++ b/src/nbl/video/utilities/CAssetConverter.cpp @@ -531,66 +531,108 @@ class AssetVisitor : public CRTP } return true; } - inline bool impl(const instance_t& instance, const CAssetConverter::patch_t& userPatch) + inline bool impl(const instance_t& instance, const CAssetConverter::patch_t& userPatch) { const auto* asset = instance.asset; const auto* layout = asset->getLayout(); if (!layout || !descend(layout,{layout})) return false; + const auto* rpass = asset->getRenderpass(); + if (!rpass || !descend(rpass,{rpass})) + return false; + using stage_t = hlsl::ShaderStage; + for (stage_t stage : {stage_t::ESS_VERTEX,stage_t::ESS_TESSELLATION_CONTROL,stage_t::ESS_TESSELLATION_EVALUATION,stage_t::ESS_GEOMETRY,stage_t::ESS_FRAGMENT}) + { + const auto& specInfo = asset->getSpecInfo(stage); + if (specInfo.size() > 0) { + const auto* shader = specInfo[0].shader.get(); //span.size() <= 1 + if (!shader) + { + if (stage == stage_t::ESS_VERTEX) // required + return false; + CRTP::template nullOptional(); + continue; + } + if (!descend(shader, { shader }, specInfo[0], stage)) + return false; + } + else { + if (stage == stage_t::ESS_VERTEX) // required + return false; + CRTP::template nullOptional(); + continue; + } + } + return true; + } + inline bool impl(const instance_t& instance, const CAssetConverter::patch_t& userPatch) + { + const auto* asset = instance.asset; + const auto* layout = asset->getLayout(); + if (!layout || !descend(layout, { layout })) + return false; const auto& specInfo = asset->getSpecInfo(); const auto* shader = specInfo.shader.get(); - if (!shader || !descend(shader,{shader},specInfo, hlsl::ESS_COMPUTE)) + if (!shader || !descend(shader, { shader }, specInfo, hlsl::ESS_COMPUTE)) return false; return true; } - inline bool impl(const instance_t& instance, const CAssetConverter::patch_t& userPatch) + inline bool impl(const instance_t& instance, const CAssetConverter::patch_t& userPatch) { const auto* asset = instance.asset; const auto* layout = asset->getLayout(); - if (!layout || !descend(layout,{layout})) + if (!layout || !descend(layout, { layout })) + return false; + const auto* rpass = asset->getRenderpass(); + if (!rpass || !descend(rpass, { rpass })) return false; using stage_t = hlsl::ShaderStage; - for (stage_t stage : {hlsl::ShaderStage::ESS_RAYGEN, hlsl::ShaderStage::ESS_MISS, hlsl::ShaderStage::ESS_ANY_HIT, hlsl::ShaderStage::ESS_CLOSEST_HIT, hlsl::ShaderStage::ESS_INTERSECTION, hlsl::ShaderStage::ESS_CALLABLE}) + for (stage_t stage : {stage_t::ESS_TASK, stage_t::ESS_MESH, stage_t::ESS_FRAGMENT}) { - const auto& specInfos = asset->getSpecInfos(stage); - for (auto specInfo_i = 0; specInfo_i < specInfos.size(); specInfo_i++) - { - const auto& specInfo = specInfos[specInfo_i]; - const auto* shader = specInfo.shader.get(); + const auto& specInfo = asset->getSpecInfo(stage); + if (specInfo.size() > 0) { + const auto* shader = specInfo[0].shader.get(); //span.size() <= 1 if (!shader) { - if (stage == stage_t::ESS_RAYGEN) return false; + if (stage == stage_t::ESS_MESH) // required + return false; CRTP::template nullOptional(); continue; } - if (!descend(shader,{shader}, specInfo, stage, specInfo_i)) return false; + if (!descend(shader, { shader }, specInfo[0], stage)) + return false; + } + else { + if (stage == stage_t::ESS_MESH) // required + return false; + CRTP::template nullOptional(); + continue; } } return true; } - inline bool impl(const instance_t& instance, const CAssetConverter::patch_t& userPatch) + inline bool impl(const instance_t& instance, const CAssetConverter::patch_t& userPatch) { const auto* asset = instance.asset; const auto* layout = asset->getLayout(); - if (!layout || !descend(layout,{layout})) - return false; - const auto* rpass = asset->getRenderpass(); - if (!rpass || !descend(rpass,{rpass})) + if (!layout || !descend(layout, { layout })) return false; using stage_t = hlsl::ShaderStage; - for (stage_t stage : {stage_t::ESS_VERTEX,stage_t::ESS_TESSELLATION_CONTROL,stage_t::ESS_TESSELLATION_EVALUATION,stage_t::ESS_GEOMETRY,stage_t::ESS_FRAGMENT}) + for (stage_t stage : {hlsl::ShaderStage::ESS_RAYGEN, hlsl::ShaderStage::ESS_MISS, hlsl::ShaderStage::ESS_ANY_HIT, hlsl::ShaderStage::ESS_CLOSEST_HIT, hlsl::ShaderStage::ESS_INTERSECTION, hlsl::ShaderStage::ESS_CALLABLE}) { - const auto& specInfo = *asset->getSpecInfo(stage); - const auto* shader = specInfo.shader.get(); - if (!shader) + const auto& specInfos = asset->getSpecInfos(stage); + for (auto specInfo_i = 0; specInfo_i < specInfos.size(); specInfo_i++) { - if (stage==stage_t::ESS_VERTEX) // required - return false; - CRTP::template nullOptional(); - continue; + const auto& specInfo = specInfos[specInfo_i]; + const auto* shader = specInfo.shader.get(); + if (!shader) + { + if (stage == stage_t::ESS_RAYGEN) return false; + CRTP::template nullOptional(); + continue; + } + if (!descend(shader, { shader }, specInfo, stage, specInfo_i)) return false; } - if (!descend(shader,{shader}, specInfo, stage)) - return false; } return true; } @@ -1386,41 +1428,6 @@ bool CAssetConverter::CHashCache::hash_impl::operator()(lookup_tgetContentHash(); return true; } -bool CAssetConverter::CHashCache::hash_impl::operator()(lookup_t lookup) -{ - const auto* asset = lookup.asset; - // - AssetVisitor> visitor = { - *this, - {asset,static_cast(patchOverride)->uniqueCopyGroupID}, - *lookup.patch - }; - if (!visitor()) - return false; - const auto& params = asset->getCachedCreationParams(); - hasher << params.requireFullSubgroups; - return true; -} -bool CAssetConverter::CHashCache::hash_impl::operator()(lookup_t lookup) -{ - const auto* asset = lookup.asset; - // - hasher << asset->getMissGroupCount(); - hasher << asset->getHitGroupCount(); - hasher << asset->getCallableGroupCount(); - AssetVisitor> visitor = { - *this, - {asset,static_cast(patchOverride)->uniqueCopyGroupID}, - *lookup.patch - }; - if (!visitor()) - return false; - const auto& params = asset->getCachedCreationParams(); - hasher << params.flags; - hasher << params.maxRecursionDepth; - hasher << params.dynamicStackSize; - return true; -} bool CAssetConverter::CHashCache::hash_impl::operator()(lookup_t lookup) { const auto* asset = lookup.asset; @@ -1609,6 +1616,113 @@ bool CAssetConverter::CHashCache::hash_impl::operator()(lookup_t lookup) +{ + const auto* asset = lookup.asset; + // + AssetVisitor> visitor = { + *this, + {asset,static_cast(patchOverride)->uniqueCopyGroupID}, + *lookup.patch + }; + if (!visitor()) + return false; + const auto& params = asset->getCachedCreationParams(); + hasher << params.requireFullSubgroups; + return true; +} +bool CAssetConverter::CHashCache::hash_impl::operator()(lookup_t lookup) +{ + const auto* asset = lookup.asset; + // + AssetVisitor> visitor = { + *this, + {asset,static_cast(patchOverride)->uniqueCopyGroupID}, + *lookup.patch + }; + if (!visitor()) + return false; + + const auto& params = asset->getCachedCreationParams(); + { + const auto& raster = params.rasterization; + if (!raster.rasterizerDiscard) + { + hasher << raster.viewportCount; + hasher << raster.samplesLog2; + hasher << raster.polygonMode; + //if (raster.polygonMode==E_POLYGON_MODE::EPM_FILL) // do wireframes and point draw with face culling? + { + hasher << raster.faceCullingMode; + hasher << raster.frontFaceIsCCW; + } + const auto& rpassParam = asset->getRenderpass()->getCreationParameters(); + const auto& depthStencilRef = rpassParam.subpasses[params.subpassIx].depthStencilAttachment.render; + if (depthStencilRef.used()) + { + const auto attFormat = rpassParam.depthStencilAttachments[depthStencilRef.attachmentIndex].format; + if (!isStencilOnlyFormat(attFormat)) + { + hasher << raster.depthCompareOp; + hasher << raster.depthWriteEnable; + if (raster.depthTestEnable()) + { + hasher << raster.depthClampEnable; + hasher << raster.depthBiasEnable; + hasher << raster.depthBoundsTestEnable; + } + } + if (raster.stencilTestEnable() && !isDepthOnlyFormat(attFormat)) + { + if ((raster.faceCullingMode & E_FACE_CULL_MODE::EFCM_FRONT_BIT) == 0) + hasher << raster.frontStencilOps; + if ((raster.faceCullingMode & E_FACE_CULL_MODE::EFCM_BACK_BIT) == 0) + hasher << raster.backStencilOps; + } + } + hasher << raster.alphaToCoverageEnable; + hasher << raster.alphaToOneEnable; + if (raster.samplesLog2) + { + hasher << raster.minSampleShadingUnorm; + hasher << (reinterpret_cast(raster.sampleMask) & ((0x1ull << raster.samplesLog2) - 1)); + } + } + for (const auto& blend : std::span(params.blend.blendParams)) + { + if (blend.blendEnabled()) + hasher.update(&blend, sizeof(blend)); + else + hasher << blend.colorWriteMask; + } + hasher << params.blend.logicOp; + } + hasher << params.subpassIx; + + //potentially more i forgot, come back to this + hasher << params.requireFullSubgroups; + return true; +} +bool CAssetConverter::CHashCache::hash_impl::operator()(lookup_t lookup) +{ + const auto* asset = lookup.asset; + // + hasher << asset->getMissGroupCount(); + hasher << asset->getHitGroupCount(); + hasher << asset->getCallableGroupCount(); + AssetVisitor> visitor = { + *this, + {asset,static_cast(patchOverride)->uniqueCopyGroupID}, + *lookup.patch + }; + if (!visitor()) + return false; + const auto& params = asset->getCachedCreationParams(); + hasher << params.flags; + hasher << params.maxRecursionDepth; + hasher << params.dynamicStackSize; + return true; +} bool CAssetConverter::CHashCache::hash_impl::operator()(lookup_t lookup) { const auto* asset = lookup.asset; @@ -1738,11 +1852,12 @@ void CAssetConverter::CHashCache::eraseStale(const IPatchOverride* patchOverride // naturally any pipeline depends on shaders and pipeline cache rehash.operator()(); rehash.operator()(); - rehash.operator()(); - rehash.template operator()(); // graphics pipeline needs a renderpass rehash.template operator()(); rehash.template operator()(); + rehash.operator()(); + rehash.template operator()(); + rehash.template operator()(); // rehash.template operator()(); rehash.template operator()(); } @@ -1889,6 +2004,65 @@ class GetDependantVisit : public GetDependantVisitBase +class GetDependantVisit : public GetDependantVisitBase +{ +public: + // using AssetType = ICPUGraphicsPipeline; + + inline auto& getSpecInfo(const IShader::E_SHADER_STAGE stage) + { + assert(hlsl::bitCount(stage) == 1); + return specInfo[hlsl::findLSB(stage)]; + } + + // ok to do non owning since some cache owns anyway + IGPUPipelineLayout* layout = nullptr; + // has to be public to allow for initializer list constructor + std::array::value*/sizeof(IShader::E_SHADER_STAGE) * 8> specInfo = {}; + // optionals (done this way because inheritance chain with templated class hides protected methods) + IGPURenderpass* renderpass = nullptr; + +protected: + bool descend_impl( + const instance_t& user, const CAssetConverter::patch_t& userPatch, + const instance_t& dep, const CAssetConverter::patch_t& soloPatch + ) + { + auto depObj = getDependant(dep, soloPatch); + if (!depObj) + return false; + layout = depObj.get(); + return true; + } + bool descend_impl( + const instance_t& user, const CAssetConverter::patch_t& userPatch, + const instance_t& dep, const CAssetConverter::patch_t& soloPatch, const ICPUPipelineBase::SShaderSpecInfo& inSpecInfo, hlsl::ShaderStage stage + ) + { + auto depObj = getDependant(dep, soloPatch); + if (!depObj) + return false; + getSpecInfo(stage) = { + .shader = depObj, + .entryPoint = inSpecInfo.entryPoint, // warning: its a `string_view` now! + .requiredSubgroupSize = inSpecInfo.requiredSubgroupSize, + .entries = inSpecInfo.entries + }; + return true; + } + bool descend_impl( + const instance_t& user, const CAssetConverter::patch_t& userPatch, + const instance_t& dep, const CAssetConverter::patch_t& soloPatch + ) + { + auto depObj = getDependant(dep, soloPatch); + if (!depObj) + return false; + renderpass = depObj.get(); + return true; + } +}; +template<> class GetDependantVisit : public GetDependantVisitBase { public: @@ -1933,63 +2107,148 @@ class GetDependantVisit : public GetDependantVisitBase -class GetDependantVisit : public GetDependantVisitBase +class GetDependantVisit : public GetDependantVisitBase { - public: -// using AssetType = ICPUGraphicsPipeline; +public: + // using AssetType = ICPUMeshPipeline; - inline auto& getSpecInfo(const IShader::E_SHADER_STAGE stage) - { - assert(hlsl::bitCount(stage)==1); - return specInfo[hlsl::findLSB(stage)]; - } + inline auto& getSpecInfo(const IShader::E_SHADER_STAGE stage) + { + //potentially incorrect, I need to dig into this + assert(hlsl::bitCount(stage) == 1); + return specInfo[hlsl::findLSB(stage)]; + } - // ok to do non owning since some cache owns anyway - IGPUPipelineLayout* layout = nullptr; - // has to be public to allow for initializer list constructor - std::array::value*/sizeof(IShader::E_SHADER_STAGE)*8> specInfo = {}; - // optionals (done this way because inheritance chain with templated class hides protected methods) - IGPURenderpass* renderpass = nullptr; + // ok to do non owning since some cache owns anyway + IGPUPipelineLayout* layout = nullptr; + // has to be public to allow for initializer list constructor + std::array::value*/sizeof(IShader::E_SHADER_STAGE) * 8> specInfo = {}; + // optionals (done this way because inheritance chain with templated class hides protected methods) + IGPURenderpass* renderpass = nullptr; - protected: - bool descend_impl( - const instance_t& user, const CAssetConverter::patch_t& userPatch, - const instance_t& dep, const CAssetConverter::patch_t& soloPatch - ) +protected: + bool descend_impl( + const instance_t& user, const CAssetConverter::patch_t& userPatch, + const instance_t& dep, const CAssetConverter::patch_t& soloPatch + ) + { + auto depObj = getDependant(dep, soloPatch); + if (!depObj) + return false; + layout = depObj.get(); + return true; + } + bool descend_impl( + const instance_t& user, const CAssetConverter::patch_t& userPatch, + const instance_t& dep, const CAssetConverter::patch_t& soloPatch, const ICPUPipelineBase::SShaderSpecInfo& inSpecInfo, hlsl::ShaderStage stage + ) + { + auto depObj = getDependant(dep, soloPatch); + if (!depObj) + return false; + getSpecInfo(stage) = { + .shader = depObj, + .entryPoint = inSpecInfo.entryPoint, // warning: its a `string_view` now! + .requiredSubgroupSize = inSpecInfo.requiredSubgroupSize, + .entries = inSpecInfo.entries + }; + return true; + } + bool descend_impl( + const instance_t& user, const CAssetConverter::patch_t& userPatch, + const instance_t& dep, const CAssetConverter::patch_t& soloPatch + ) + { + auto depObj = getDependant(dep, soloPatch); + if (!depObj) + return false; + renderpass = depObj.get(); + return true; + } +}; +template<> +class GetDependantVisit : public GetDependantVisitBase +{ +public: + + inline void allocateShaders(size_t missCount, size_t hitGroupCount, size_t callableGroupCount) + { + misses.resize(missCount); + hitGroups.anyHits.resize(hitGroupCount); + hitGroups.closestHits.resize(hitGroupCount); + hitGroups.intersections.resize(hitGroupCount); + callables.resize(callableGroupCount); + } + + inline core::vector* getSpecInfoVector(const hlsl::ShaderStage stage) + { + switch (stage) { - auto depObj = getDependant(dep,soloPatch); - if (!depObj) - return false; - layout = depObj.get(); - return true; + // raygen is not stored as vector so we can't return it here. Use getSpecInfo + case hlsl::ShaderStage::ESS_MISS: + return &misses; + case hlsl::ShaderStage::ESS_ANY_HIT: + return &hitGroups.anyHits; + case hlsl::ShaderStage::ESS_CLOSEST_HIT: + return &hitGroups.closestHits; + case hlsl::ShaderStage::ESS_INTERSECTION: + return &hitGroups.intersections; + case hlsl::ShaderStage::ESS_CALLABLE: + return &callables; } - bool descend_impl( - const instance_t& user, const CAssetConverter::patch_t& userPatch, - const instance_t& dep, const CAssetConverter::patch_t& soloPatch, const ICPUPipelineBase::SShaderSpecInfo& inSpecInfo, hlsl::ShaderStage stage - ) + return nullptr; + } + + // ok to do non owning since some cache owns anyway + IGPUPipelineLayout* layout = nullptr; + ICPUPipelineBase::SShaderSpecInfo raygen; + core::vector misses; + ICPURayTracingPipeline::SHitGroupSpecInfos hitGroups; + core::vector callables; + +protected: + bool descend_impl( + const instance_t& user, const CAssetConverter::patch_t& userPatch, + const instance_t& dep, const CAssetConverter::patch_t& soloPatch + ) + { + auto depObj = getDependant(dep, soloPatch); + if (!depObj) + return false; + layout = depObj.get(); + return true; + } + bool descend_impl( + const instance_t& user, const CAssetConverter::patch_t& userPatch, + const instance_t& dep, const CAssetConverter::patch_t& soloPatch, const ICPUPipelineBase::SShaderSpecInfo& inSpecInfo, hlsl::ShaderStage stage, uint32_t groupIndex + ) + { + auto depObj = getDependant(dep, soloPatch); + if (!depObj) + return false; + if (stage == hlsl::ShaderStage::ESS_RAYGEN) { - auto depObj = getDependant(dep,soloPatch); - if (!depObj) - return false; - getSpecInfo(stage) = { + assert(groupIndex == 0); + raygen = ICPUPipelineBase::SShaderSpecInfo{ .shader = depObj, - .entryPoint = inSpecInfo.entryPoint, // warning: its a `string_view` now! + .entryPoint = inSpecInfo.entryPoint, .requiredSubgroupSize = inSpecInfo.requiredSubgroupSize, - .entries = inSpecInfo.entries + .entries = inSpecInfo.entries, }; - return true; } - bool descend_impl( - const instance_t& user, const CAssetConverter::patch_t& userPatch, - const instance_t& dep, const CAssetConverter::patch_t& soloPatch - ) + else { - auto depObj = getDependant(dep,soloPatch); - if (!depObj) - return false; - renderpass = depObj.get(); - return true; + auto& shaderGroups = *getSpecInfoVector(stage); + assert(groupIndex < shaderGroups.size()); + shaderGroups[groupIndex] = ICPUPipelineBase::SShaderSpecInfo{ + .shader = depObj, + .entryPoint = inSpecInfo.entryPoint, + .requiredSubgroupSize = inSpecInfo.requiredSubgroupSize, + .entries = inSpecInfo.entries, + }; } + return true; + } }; template<> class GetDependantVisit : public GetDependantVisitBase @@ -2188,89 +2447,6 @@ class GetDependantVisit : public GetDependantVisitBase -class GetDependantVisit : public GetDependantVisitBase -{ -public: - - inline void allocateShaders(size_t missCount, size_t hitGroupCount, size_t callableGroupCount) - { - misses.resize(missCount); - hitGroups.anyHits.resize(hitGroupCount); - hitGroups.closestHits.resize(hitGroupCount); - hitGroups.intersections.resize(hitGroupCount); - callables.resize(callableGroupCount); - } - - inline core::vector* getSpecInfoVector(const hlsl::ShaderStage stage) - { - switch (stage) - { - // raygen is not stored as vector so we can't return it here. Use getSpecInfo - case hlsl::ShaderStage::ESS_MISS: - return &misses; - case hlsl::ShaderStage::ESS_ANY_HIT: - return &hitGroups.anyHits; - case hlsl::ShaderStage::ESS_CLOSEST_HIT: - return &hitGroups.closestHits; - case hlsl::ShaderStage::ESS_INTERSECTION: - return &hitGroups.intersections; - case hlsl::ShaderStage::ESS_CALLABLE: - return &callables; - } - return nullptr; - } - - // ok to do non owning since some cache owns anyway - IGPUPipelineLayout* layout = nullptr; - ICPUPipelineBase::SShaderSpecInfo raygen; - core::vector misses; - ICPURayTracingPipeline::SHitGroupSpecInfos hitGroups; - core::vector callables; - -protected: - bool descend_impl( - const instance_t& user, const CAssetConverter::patch_t& userPatch, - const instance_t& dep, const CAssetConverter::patch_t& soloPatch - ) - { - auto depObj = getDependant(dep, soloPatch); - if (!depObj) - return false; - layout = depObj.get(); - return true; - } - bool descend_impl( - const instance_t& user, const CAssetConverter::patch_t& userPatch, - const instance_t& dep, const CAssetConverter::patch_t& soloPatch, const ICPUPipelineBase::SShaderSpecInfo& inSpecInfo, hlsl::ShaderStage stage, uint32_t groupIndex - ) - { - auto depObj = getDependant(dep, soloPatch); - if (!depObj) - return false; - if (stage == hlsl::ShaderStage::ESS_RAYGEN) - { - assert(groupIndex == 0); - raygen = ICPUPipelineBase::SShaderSpecInfo{ - .shader = depObj, - .entryPoint = inSpecInfo.entryPoint, - .requiredSubgroupSize = inSpecInfo.requiredSubgroupSize, - .entries = inSpecInfo.entries, - }; - } else - { - auto& shaderGroups = *getSpecInfoVector(stage); - assert(groupIndex < shaderGroups.size()); - shaderGroups[groupIndex] = ICPUPipelineBase::SShaderSpecInfo{ - .shader = depObj, - .entryPoint = inSpecInfo.entryPoint, - .requiredSubgroupSize = inSpecInfo.requiredSubgroupSize, - .entries = inSpecInfo.entries, - }; - } - return true; - } -}; // Needed both for reservation and conversion class MetaDeviceMemoryAllocator final @@ -2902,11 +3078,14 @@ auto CAssetConverter::reserve(const SInputs& inputs) -> SReserveResult case ICPUPipelineLayout::AssetType: visit.template operator()(entry); break; + case ICPUGraphicsPipeline::AssetType: + visit.template operator()(entry); + break; case ICPUComputePipeline::AssetType: visit.template operator()(entry); break; - case ICPUGraphicsPipeline::AssetType: - visit.template operator()(entry); + case ICPUMeshPipeline::AssetType: + visit.template operator()(entry); break; case ICPURayTracingPipeline::AssetType: visit.template operator()(entry); @@ -3517,38 +3696,6 @@ auto CAssetConverter::reserve(const SInputs& inputs) -> SReserveResult } } } - if constexpr (std::is_same_v) - { - for (auto& entry : conversionRequests.contentHashToCanonical) - { - const ICPUComputePipeline* asset = entry.second.canonicalAsset; - // there is no patching possible for this asset - for (auto i=0ull; i> visitor = { - {visitBase}, - {asset,uniqueCopyGroupID}, - {} - }; - if (!visitor()) - continue; - // ILogicalDevice::createComputePipelines is rather aggressive on the spec constant validation, so we create one pipeline at a time - core::smart_refctd_ptr ppln; - IGPUPipelineBase::SShaderEntryMap entryMap; - { - // no derivatives, special flags, etc. - IGPUComputePipeline::SCreationParams params = {}; - params.layout = visitor.layout; - // while there are patches possible for shaders, the only patch which can happen here is changing a stage from UNKNOWN to COMPUTE - params.shader = IGPUPipelineBase::SShaderSpecInfo::create(visitor.getSpecInfo(), &entryMap); - device->createComputePipelines(inputs.pipelineCache,{¶ms,1},&ppln); - } - conversionRequests.assign(entry.first,entry.second.firstCopyIx,i,std::move(ppln)); - } - } - } if constexpr (std::is_same_v) { for (auto& entry : conversionRequests.contentHashToCanonical) @@ -3611,7 +3758,84 @@ auto CAssetConverter::reserve(const SInputs& inputs) -> SReserveResult } } } - if constexpr (std::is_same_v) + if constexpr (std::is_same_v) + { + for (auto& entry : conversionRequests.contentHashToCanonical) + { + const ICPUComputePipeline* asset = entry.second.canonicalAsset; + // there is no patching possible for this asset + for (auto i = 0ull; i < entry.second.copyCount; i++) + { + const auto outIx = i + entry.second.firstCopyIx; + const auto uniqueCopyGroupID = conversionRequests.gpuObjUniqueCopyGroupIDs[outIx]; + AssetVisitor> visitor = { + {visitBase}, + {asset,uniqueCopyGroupID}, + {} + }; + if (!visitor()) + continue; + // ILogicalDevice::createComputePipelines is rather aggressive on the spec constant validation, so we create one pipeline at a time + core::smart_refctd_ptr ppln; + IGPUPipelineBase::SShaderEntryMap entryMap; + { + // no derivatives, special flags, etc. + IGPUComputePipeline::SCreationParams params = {}; + params.layout = visitor.layout; + // while there are patches possible for shaders, the only patch which can happen here is changing a stage from UNKNOWN to COMPUTE + params.shader = IGPUPipelineBase::SShaderSpecInfo::create(visitor.getSpecInfo(), &entryMap); + device->createComputePipelines(inputs.pipelineCache, { ¶ms,1 }, &ppln); + } + conversionRequests.assign(entry.first, entry.second.firstCopyIx, i, std::move(ppln)); + } + } + } + //seems like this should be a else if. begging the C++ standard for a constexpr switch + if constexpr (std::is_same_v) + { + for (auto& entry : conversionRequests.contentHashToCanonical) + { + const ICPUMeshPipeline* asset = entry.second.canonicalAsset; + // there is no patching possible for this asset + for (auto i = 0ull; i < entry.second.copyCount; i++) + { + const auto outIx = i + entry.second.firstCopyIx; + const auto uniqueCopyGroupID = conversionRequests.gpuObjUniqueCopyGroupIDs[outIx]; + AssetVisitor> visitor = { + {visitBase}, + {asset,uniqueCopyGroupID}, + {} + }; + if (!visitor()) + continue; + // ILogicalDevice::createComputePipelines is rather aggressive on the spec constant validation, so we create one pipeline at a time + core::smart_refctd_ptr ppln; + { + // no derivatives, special flags, etc. + IGPUMeshPipeline::SCreationParams params = {}; + using SShaderEntryMap = IGPUPipelineBase::SShaderEntryMap; + SShaderEntryMap taskEntryMap; + SShaderEntryMap meshEntryMap; + SShaderEntryMap fragmentEntryMap; + bool depNotFound = false; + { + params.layout = visitor.layout; + params.renderpass = visitor.renderpass; + // while there are patches possible for shaders, the only patch which can happen here is changing a stage from UNKNOWN to match the slot here + using stage_t = hlsl::ShaderStage; + using GPUShaderSpecInfo = IGPUPipelineBase::SShaderSpecInfo; + params.taskShader = GPUShaderSpecInfo::create(visitor.getSpecInfo(hlsl::ESS_TASK), &taskEntryMap); + params.meshShader = GPUShaderSpecInfo::create(visitor.getSpecInfo(hlsl::ESS_MESH), &meshEntryMap); + params.fragmentShader = GPUShaderSpecInfo::create(visitor.getSpecInfo(hlsl::ESS_FRAGMENT), &fragmentEntryMap); + } + params.cached = asset->getCachedCreationParams(); + device->createMeshPipelines(inputs.pipelineCache, { ¶ms,1 }, &ppln); + conversionRequests.assign(entry.first, entry.second.firstCopyIx, i, std::move(ppln)); + } + } + } + } + if constexpr (std::is_same_v) { for (auto& entry : conversionRequests.contentHashToCanonical) { @@ -3652,7 +3876,7 @@ auto CAssetConverter::reserve(const SInputs& inputs) -> SReserveResult core::vector specs; explicit GPUSpecEntryVec(std::span cpuSpecs) - : entryMaps(cpuSpecs.size()), specs(cpuSpecs.size()) + : entryMaps(cpuSpecs.size()), specs(cpuSpecs.size()) { for (auto spec_i = 0u; spec_i < cpuSpecs.size(); spec_i++) specs[spec_i] = GPUShaderSpecInfo::create(cpuSpecs[spec_i], &entryMaps[spec_i]); @@ -3907,10 +4131,11 @@ auto CAssetConverter::reserve(const SInputs& inputs) -> SReserveResult dedupCreateProp.template operator()(); dedupCreateProp.template operator()(); dedupCreateProp.template operator()(); - dedupCreateProp.template operator()(); - dedupCreateProp.template operator()(); dedupCreateProp.template operator()(); dedupCreateProp.template operator()(); + dedupCreateProp.template operator()(); + dedupCreateProp.template operator()(); + dedupCreateProp.template operator()(); dedupCreateProp.template operator()(); // dedupCreateProp.template operator()(); dedupCreateProp.template operator()(); @@ -3990,9 +4215,10 @@ auto CAssetConverter::reserve(const SInputs& inputs) -> SReserveResult // pruneStaging.template operator()(); pruneStaging.template operator()(); pruneStaging.template operator()(); - pruneStaging.template operator()(); pruneStaging.template operator()(); + pruneStaging.template operator()(); pruneStaging.template operator()(); + pruneStaging.template operator()(); pruneStaging.template operator()(); pruneStaging.template operator()(); pruneStaging.template operator()(); From 32f4d99be48f93034c6bf32d222e53186412b77c Mon Sep 17 00:00:00 2001 From: Corey Williams Date: Thu, 26 Feb 2026 14:12:54 -0600 Subject: [PATCH 3/3] more Rasterization abstraction tweaks --- include/nbl/asset/IGraphicsPipeline.h | 16 +- include/nbl/asset/IMeshPipeline.h | 22 +- include/nbl/asset/IRasterizationPipeline.h | 46 ++++ include/nbl/video/IGPUMeshPipeline.h | 2 +- src/nbl/ext/ImGui/ImGui.cpp | 10 +- src/nbl/video/CVulkanLogicalDevice.cpp | 284 ++++++++++++--------- 6 files changed, 234 insertions(+), 146 deletions(-) create mode 100644 include/nbl/asset/IRasterizationPipeline.h diff --git a/include/nbl/asset/IGraphicsPipeline.h b/include/nbl/asset/IGraphicsPipeline.h index 5b445afae5..596dec3bda 100644 --- a/include/nbl/asset/IGraphicsPipeline.h +++ b/include/nbl/asset/IGraphicsPipeline.h @@ -7,6 +7,8 @@ #include "nbl/asset/IPipeline.h" #include "nbl/asset/IRenderpass.h" +#include "nbl/asset/IRasterizationPipeline.h" + #include @@ -71,26 +73,21 @@ class IGraphicsPipelineBase : public virtual core::IReferenceCounted public: constexpr static inline uint8_t GRAPHICS_SHADER_STAGE_COUNT = 5u; - struct SCachedCreationParams final + struct SCachedCreationParams final : public IRasterizationPipelineBase::SCachedCreationParams { SVertexInputParams vertexInput = {}; SPrimitiveAssemblyParams primitiveAssembly = {}; - SRasterizationParams rasterization = {}; - SBlendParams blend = {}; - uint32_t subpassIx = 0u; }; }; template -class IGraphicsPipeline : public IPipeline, public IGraphicsPipelineBase +class IGraphicsPipeline : public IRasterizationPipeline, public IGraphicsPipelineBase { protected: using renderpass_t = RenderpassType; public: inline const SCachedCreationParams& getCachedCreationParams() const {return m_params;} - inline const renderpass_t* getRenderpass() const {return m_renderpass.get();} - static inline bool hasRequiredStages(const core::bitflag& stagePresence, E_PRIMITIVE_TOPOLOGY primitiveType) { @@ -110,12 +107,11 @@ class IGraphicsPipeline : public IPipeline, public IGraphics protected: explicit IGraphicsPipeline(PipelineLayoutType* layout, const SCachedCreationParams& cachedParams, renderpass_t* renderpass) : - IPipeline(core::smart_refctd_ptr(layout)), - m_params(cachedParams), m_renderpass(core::smart_refctd_ptr(renderpass)) + IRasterizationPipeline(layout, renderpass), + m_params(cachedParams) {} SCachedCreationParams m_params = {}; - core::smart_refctd_ptr m_renderpass = nullptr; }; } diff --git a/include/nbl/asset/IMeshPipeline.h b/include/nbl/asset/IMeshPipeline.h index 8fa850225d..6e82298f6b 100644 --- a/include/nbl/asset/IMeshPipeline.h +++ b/include/nbl/asset/IMeshPipeline.h @@ -5,27 +5,25 @@ #include "nbl/asset/RasterizationStates.h" #include "nbl/asset/IPipeline.h" +#include "nbl/asset/IRasterizationPipeline.h" + namespace nbl::asset { class IMeshPipelineBase : public virtual core::IReferenceCounted { public: constexpr static inline uint8_t MESH_SHADER_STAGE_COUNT = 3u; - struct SCachedCreationParams final { - SRasterizationParams rasterization = {}; - SBlendParams blend = {}; - uint32_t subpassIx = 0u; + struct SCachedCreationParams final : public IRasterizationPipelineBase::SCachedCreationParams + { uint8_t requireFullSubgroups = false; }; - }; template - class IMeshPipeline : public IPipeline, public IMeshPipelineBase { + class IMeshPipeline : public IRasterizationPipeline, public IMeshPipelineBase { protected: using renderpass_t = RenderpassType; public: inline const SCachedCreationParams& getCachedCreationParams() const { return m_params; } - inline const renderpass_t* getRenderpass() const {return m_renderpass.get();} static inline bool hasRequiredStages(const core::bitflag& stagePresence) { @@ -33,14 +31,12 @@ namespace nbl::asset { } protected: - explicit IMeshPipeline(PipelineLayoutType* layout, const SCachedCreationParams& cachedParams, renderpass_t* renderpass) : - IPipeline(core::smart_refctd_ptr(layout)), - m_params(cachedParams), m_renderpass(core::smart_refctd_ptr(renderpass)) - { - } + explicit IMeshPipeline(PipelineLayoutType* layout, const IMeshPipelineBase::SCachedCreationParams& cachedParams, renderpass_t* renderpass) : + IRasterizationPipeline(layout, renderpass), + m_params(cachedParams) + {} SCachedCreationParams m_params = {}; - core::smart_refctd_ptr m_renderpass = nullptr; }; } diff --git a/include/nbl/asset/IRasterizationPipeline.h b/include/nbl/asset/IRasterizationPipeline.h new file mode 100644 index 0000000000..196554e760 --- /dev/null +++ b/include/nbl/asset/IRasterizationPipeline.h @@ -0,0 +1,46 @@ +#ifndef _NBL_ASSET_I_RASTERIZATION_PIPELINE_H_INCLUDED_ +#define _NBL_ASSET_I_RASTERIZATION_PIPELINE_H_INCLUDED_ + +#include "nbl/asset/IShader.h" +#include "nbl/asset/RasterizationStates.h" +#include "nbl/asset/IPipeline.h" +#include "nbl/asset/IRenderpass.h" + + +//the primary goal is to abstract between mesh and traditional graphics (vertex) pipelines +//so that any pipeline that can be bound to VK_PIPELINE_BIND_POINT_GRAPHICS can be returned polymorphically +//the secondary goal is to not change IGraphicsPipeline as little as possible + +namespace nbl::asset { + +class IRasterizationPipelineBase +{ + //IRasterizationPipelineBase isn't inherited from, only SCachedCreationParams + public: + struct SCachedCreationParams { + SRasterizationParams rasterization = {}; + SBlendParams blend = {}; + uint32_t subpassIx = 0u; + }; +}; + +template +class IRasterizationPipeline : public IPipeline +{ +protected: + using renderpass_t = RenderpassType; + +public: + inline const renderpass_t* getRenderpass() const { return m_renderpass.get(); } +protected: + explicit IRasterizationPipeline(PipelineLayoutType* layout, renderpass_t* renderpass) : + IPipeline(core::smart_refctd_ptr(layout)), + m_renderpass(core::smart_refctd_ptr(renderpass)) + {} + + core::smart_refctd_ptr m_renderpass = nullptr; +}; +} + + +#endif \ No newline at end of file diff --git a/include/nbl/video/IGPUMeshPipeline.h b/include/nbl/video/IGPUMeshPipeline.h index a7770ee8d3..a909a61de1 100644 --- a/include/nbl/video/IGPUMeshPipeline.h +++ b/include/nbl/video/IGPUMeshPipeline.h @@ -69,7 +69,7 @@ namespace nbl::video if (!hasRequiredStages(stagePresence)) return {}; - //if (!vertexShader.shader) return {}; //i dont quite understand why this line was in IGPUGraphics. checking if the shader itself was made correctly? + if (!meshShader.shader) return {}; //mesh shader is required in mesh pipelines, fragment and task are optional return retval; } diff --git a/src/nbl/ext/ImGui/ImGui.cpp b/src/nbl/ext/ImGui/ImGui.cpp index f477e96cdf..de241e702f 100644 --- a/src/nbl/ext/ImGui/ImGui.cpp +++ b/src/nbl/ext/ImGui/ImGui.cpp @@ -350,7 +350,15 @@ core::smart_refctd_ptr UI::createPipeline(SCreation param.fragmentShader = { .shader = shaders.fragment.get(), .entryPoint = "PSMain" }; param.layout = pipelineLayout.get(); param.renderpass = creationParams.renderpass.get(); - param.cached = { .vertexInput = vertexInputParams, .primitiveAssembly = primitiveAssemblyParams, .rasterization = rasterizationParams, .blend = blendParams, .subpassIx = creationParams.subpassIx }; + param.cached = { + { //cant do designated, "no non-static member named 'nbl'" + .rasterization = rasterizationParams, + .blend = blendParams, + .subpassIx = creationParams.subpassIx, + }, + /*.vertexInput =*/ vertexInputParams, //cant mix designated and non-designated + /*.primitiveAssembly = */ primitiveAssemblyParams + }; }; if (!creationParams.utilities->getLogicalDevice()->createGraphicsPipelines(creationParams.pipelineCache.get(), params, &pipeline)) diff --git a/src/nbl/video/CVulkanLogicalDevice.cpp b/src/nbl/video/CVulkanLogicalDevice.cpp index 3df77e5634..4230221e67 100644 --- a/src/nbl/video/CVulkanLogicalDevice.cpp +++ b/src/nbl/video/CVulkanLogicalDevice.cpp @@ -1202,36 +1202,6 @@ void CVulkanLogicalDevice::createComputePipelines_impl( std::fill_n(output,vk_createInfos.size(),nullptr); } -void PopulateViewport(VkPipelineViewportStateCreateInfo& outViewport, nbl::asset::SRasterizationParams const& raster) { - outViewport.viewportCount = raster.viewportCount; - // must be identical to viewport count unless VK_DYNAMIC_STATE_VIEWPORT_WITH_COUNT_EXT or VK_DYNAMIC_STATE_SCISSOR_WITH_COUNT_EXT are used - outViewport.scissorCount = raster.viewportCount; -} - - -void PopulateRaster(VkPipelineRasterizationStateCreateInfo& outRaster, nbl::asset::SRasterizationParams const& raster) { - outRaster.depthClampEnable = raster.depthClampEnable; - outRaster.rasterizerDiscardEnable = raster.rasterizerDiscard; - outRaster.polygonMode = static_cast(raster.polygonMode); - outRaster.cullMode = static_cast(raster.faceCullingMode); - outRaster.frontFace = raster.frontFaceIsCCW ? VK_FRONT_FACE_COUNTER_CLOCKWISE : VK_FRONT_FACE_CLOCKWISE; - outRaster.depthBiasEnable = raster.depthBiasEnable; -} - -void PopulateMultisample(VkPipelineMultisampleStateCreateInfo& outMultisample, nbl::asset::SRasterizationParams const& raster) { - outMultisample.rasterizationSamples = static_cast(0x1 << raster.samplesLog2); - if (raster.minSampleShadingUnorm > 0) { - outMultisample.sampleShadingEnable = true; - outMultisample.minSampleShading = float(raster.minSampleShadingUnorm) / 255.f; - } - else { - outMultisample.sampleShadingEnable = false; - outMultisample.minSampleShading = 0.f; - } - outMultisample.pSampleMask = raster.sampleMask; - outMultisample.alphaToCoverageEnable = raster.alphaToCoverageEnable; - outMultisample.alphaToOneEnable = raster.alphaToOneEnable; -} VkStencilOpState getVkStencilOpStateFrom(const asset::SStencilOpParams& params) { return { .failOp = static_cast(params.failOp), @@ -1241,94 +1211,165 @@ VkStencilOpState getVkStencilOpStateFrom(const asset::SStencilOpParams& params) }; } -void PopulateDepthStencil(VkPipelineDepthStencilStateCreateInfo& outDepthStencil, nbl::asset::SRasterizationParams const& raster) { - outDepthStencil.depthTestEnable = raster.depthTestEnable(); - outDepthStencil.depthWriteEnable = raster.depthWriteEnable; - outDepthStencil.depthCompareOp = static_cast(raster.depthCompareOp); - outDepthStencil.depthBoundsTestEnable = raster.depthBoundsTestEnable; - outDepthStencil.stencilTestEnable = raster.stencilTestEnable(); - outDepthStencil.front = getVkStencilOpStateFrom(raster.frontStencilOps); - outDepthStencil.back = getVkStencilOpStateFrom(raster.backStencilOps); -} -void PopulateColorBlend( - VkPipelineColorBlendStateCreateInfo& outColorBlend, - VkPipelineColorBlendAttachmentState*& outColorBlendAttachmentState, - nbl::asset::SBlendParams const& blend, - nbl::asset::IRenderpass::SCreationParams::SSubpassDescription const& subpass -) { - //outColorBlend->flags no attachment order access yet - outColorBlend.logicOpEnable = blend.logicOp != asset::ELO_NO_OP; - outColorBlend.logicOp = getVkLogicOpFromLogicOp(blend.logicOp); - outColorBlend.pAttachments = outColorBlendAttachmentState; - for (auto i = 0; i < IGPURenderpass::SCreationParams::SSubpassDescription::MaxColorAttachments; i++) { - if (subpass.colorAttachments[i].render.used()) { - const auto& params = blend.blendParams[i]; - outColorBlendAttachmentState->blendEnable = params.blendEnabled(); - outColorBlendAttachmentState->srcColorBlendFactor = getVkBlendFactorFromBlendFactor(static_cast(params.srcColorFactor)); - outColorBlendAttachmentState->dstColorBlendFactor = getVkBlendFactorFromBlendFactor(static_cast(params.dstColorFactor)); - outColorBlendAttachmentState->colorBlendOp = getVkBlendOpFromBlendOp(static_cast(params.colorBlendOp)); - outColorBlendAttachmentState->srcAlphaBlendFactor = getVkBlendFactorFromBlendFactor(static_cast(params.srcAlphaFactor)); - outColorBlendAttachmentState->dstAlphaBlendFactor = getVkBlendFactorFromBlendFactor(static_cast(params.dstAlphaFactor)); - outColorBlendAttachmentState->alphaBlendOp = getVkBlendOpFromBlendOp(static_cast(params.alphaBlendOp)); - outColorBlendAttachmentState->colorWriteMask = getVkColorComponentFlagsFromColorWriteMask(params.colorWriteMask); - outColorBlendAttachmentState++; - //^that pointer iterator is how we ensure the attachments or consecutive - } - } - outColorBlend.attachmentCount = std::distance(outColorBlend.pAttachments, outColorBlendAttachmentState); -} template -void PopulateRasterizationPipelineCommonData( - const std::span createInfos, - core::vector& vk_createInfos, - - core::vector& vk_viewportStates, - core::vector& vk_rasterizationStates, - core::vector& vk_multisampleStates, - core::vector& vk_depthStencilStates, - core::vector& vk_colorBlendStates, - core::vector& vk_colorBlendAttachmentStates, - - core::vector& vk_dynamicStates, - const VkPipelineDynamicStateCreateInfo& vk_dynamicStateCreateInfo -) { - //the main concern is lifetime, so don't want to construct, move, or copy anything in here +struct RasterizationPipelineDataPopulator{ + const std::span createInfos; + const std::span vk_createInfos; + + const std::span vk_viewportStates; + const std::span vk_rasterizationStates; + const std::span vk_multisampleStates; + const std::span vk_depthStencilStates; + const std::span vk_colorBlendStates; + + std::size_t color_blend_attachment_iterator = 0; + const std::span vk_colorBlendAttachmentStates; + + const std::span vk_dynamicStates; + const VkPipelineDynamicStateCreateInfo& vk_dynamicStateCreateInfo; + + + void PopulateViewport(std::size_t info_index) { + auto& info = createInfos[info_index]; + const auto& raster = info.cached.rasterization; + vk_viewportStates[info_index].viewportCount = raster.viewportCount; + // must be identical to viewport count unless VK_DYNAMIC_STATE_VIEWPORT_WITH_COUNT_EXT or VK_DYNAMIC_STATE_SCISSOR_WITH_COUNT_EXT are used + vk_viewportStates[info_index].scissorCount = raster.viewportCount; + } - auto outColorBlendAttachmentState = vk_colorBlendAttachmentStates.data(); //the pointer iterator is used + void PopulateRaster(std::size_t info_index) { + auto& info = createInfos[info_index]; + const auto& raster = info.cached.rasterization; + VkPipelineRasterizationStateCreateInfo& outRaster = vk_rasterizationStates[info_index]; + + outRaster.depthClampEnable = raster.depthClampEnable; + outRaster.rasterizerDiscardEnable = raster.rasterizerDiscard; + outRaster.polygonMode = static_cast(raster.polygonMode); + outRaster.cullMode = static_cast(raster.faceCullingMode); + outRaster.frontFace = raster.frontFaceIsCCW ? VK_FRONT_FACE_COUNTER_CLOCKWISE : VK_FRONT_FACE_CLOCKWISE; + outRaster.depthBiasEnable = raster.depthBiasEnable; + } + + void PopulateMultisample(std::size_t info_index) { + auto& info = createInfos[info_index]; + const auto& raster = info.cached.rasterization; + auto& outMultisample = vk_multisampleStates[info_index]; + outMultisample.rasterizationSamples = static_cast(0x1 << raster.samplesLog2); + if (raster.minSampleShadingUnorm > 0) { + outMultisample.sampleShadingEnable = true; + outMultisample.minSampleShading = float(raster.minSampleShadingUnorm) / 255.f; + } + else { + outMultisample.sampleShadingEnable = false; + outMultisample.minSampleShading = 0.f; + } + outMultisample.pSampleMask = raster.sampleMask; + outMultisample.alphaToCoverageEnable = raster.alphaToCoverageEnable; + outMultisample.alphaToOneEnable = raster.alphaToOneEnable; + } - for (uint32_t i = 0; i < createInfos.size(); i++) { //whats the maximum number of pipelines that can be created at once? uint32_t to be safe - auto& info = createInfos[i]; - const auto& blend = info.cached.blend; + void PopulateDepthStencil(std::size_t info_index) { + auto& info = createInfos[info_index]; const auto& raster = info.cached.rasterization; + auto& outDepthStencil = vk_depthStencilStates[info_index]; + + outDepthStencil.depthTestEnable = raster.depthTestEnable(); + outDepthStencil.depthWriteEnable = raster.depthWriteEnable; + outDepthStencil.depthCompareOp = static_cast(raster.depthCompareOp); + outDepthStencil.depthBoundsTestEnable = raster.depthBoundsTestEnable; + outDepthStencil.stencilTestEnable = raster.stencilTestEnable(); + outDepthStencil.front = getVkStencilOpStateFrom(raster.frontStencilOps); + outDepthStencil.back = getVkStencilOpStateFrom(raster.backStencilOps); + } + + void PopulateColorBlend(std::size_t info_index) { + auto& info = createInfos[info_index]; + auto const& blend = info.cached.blend; const auto& subpass = info.renderpass->getCreationParameters().subpasses[info.cached.subpassIx]; + auto& outColorBlend = vk_colorBlendStates[info_index]; + + //outColorBlend->flags no attachment order access yet + outColorBlend.logicOpEnable = blend.logicOp != asset::ELO_NO_OP; + outColorBlend.logicOp = getVkLogicOpFromLogicOp(blend.logicOp); + outColorBlend.pAttachments = &vk_colorBlendAttachmentStates[color_blend_attachment_iterator]; + for (auto i = 0; i < IGPURenderpass::SCreationParams::SSubpassDescription::MaxColorAttachments; i++) { + if (subpass.colorAttachments[i].render.used()) { + const auto& params = blend.blendParams[i]; + auto& outColorBlendAttachmentState = vk_colorBlendAttachmentStates[color_blend_attachment_iterator]; + outColorBlendAttachmentState.blendEnable = params.blendEnabled(); + outColorBlendAttachmentState.srcColorBlendFactor = getVkBlendFactorFromBlendFactor(static_cast(params.srcColorFactor)); + outColorBlendAttachmentState.dstColorBlendFactor = getVkBlendFactorFromBlendFactor(static_cast(params.dstColorFactor)); + outColorBlendAttachmentState.colorBlendOp = getVkBlendOpFromBlendOp(static_cast(params.colorBlendOp)); + outColorBlendAttachmentState.srcAlphaBlendFactor = getVkBlendFactorFromBlendFactor(static_cast(params.srcAlphaFactor)); + outColorBlendAttachmentState.dstAlphaBlendFactor = getVkBlendFactorFromBlendFactor(static_cast(params.dstAlphaFactor)); + outColorBlendAttachmentState.alphaBlendOp = getVkBlendOpFromBlendOp(static_cast(params.alphaBlendOp)); + outColorBlendAttachmentState.colorWriteMask = getVkColorComponentFlagsFromColorWriteMask(params.colorWriteMask); + color_blend_attachment_iterator++; + //^that pointer iterator is how we ensure the attachments or consecutive + } + } + outColorBlend.attachmentCount = std::distance(outColorBlend.pAttachments, &vk_colorBlendAttachmentStates[color_blend_attachment_iterator]); + } - initPipelineCreateInfo(&vk_createInfos[i], info); - - PopulateViewport(vk_viewportStates[i], raster); - PopulateRaster(vk_rasterizationStates[i], raster); - PopulateMultisample(vk_multisampleStates[i], raster); - PopulateDepthStencil(vk_depthStencilStates[i], raster); - PopulateColorBlend(vk_colorBlendStates[i], outColorBlendAttachmentState, blend, subpass); - //PopulateDynamicState(dynState, ?) - - - vk_createInfos[i].pViewportState = &vk_viewportStates[i]; - vk_createInfos[i].pRasterizationState = &vk_rasterizationStates[i]; - vk_createInfos[i].pMultisampleState = &vk_multisampleStates[i]; - vk_createInfos[i].pDepthStencilState = &vk_depthStencilStates[i]; - vk_createInfos[i].pColorBlendState = &vk_colorBlendStates[i]; - vk_createInfos[i].pDynamicState = &vk_dynamicStateCreateInfo; - vk_createInfos[i].renderPass = static_cast(info.renderpass)->getInternalObject(); - vk_createInfos[i].subpass = info.cached.subpassIx; - //handle - //index - //layout? - // ^ handled in initPipelineCreateInfo + void Populate(){ + for (uint32_t i = 0; i < createInfos.size(); i++) { //whats the maximum number of pipelines that can be created at once? uint32_t to be safe + auto& info = createInfos[i]; + + initPipelineCreateInfo(&vk_createInfos[i], info); + + PopulateViewport(i); + PopulateRaster(i); + PopulateMultisample(i); + PopulateDepthStencil(i); + PopulateColorBlend(i); + //PopulateDynamicState(dynState, ?) + + + vk_createInfos[i].pViewportState = &vk_viewportStates[i]; + vk_createInfos[i].pRasterizationState = &vk_rasterizationStates[i]; + vk_createInfos[i].pMultisampleState = &vk_multisampleStates[i]; + vk_createInfos[i].pDepthStencilState = &vk_depthStencilStates[i]; + vk_createInfos[i].pColorBlendState = &vk_colorBlendStates[i]; + vk_createInfos[i].pDynamicState = &vk_dynamicStateCreateInfo; + vk_createInfos[i].renderPass = static_cast(info.renderpass)->getInternalObject(); + vk_createInfos[i].subpass = info.cached.subpassIx; + //handle + //index + //layout? + // ^ handled in initPipelineCreateInfo + } } -} + explicit RasterizationPipelineDataPopulator( + const std::span createInfos, + const std::span vk_createInfos, + + const std::span vk_viewportStates, + const std::span vk_rasterizationStates, + const std::span vk_multisampleStates, + const std::span vk_depthStencilStates, + const std::span vk_colorBlendStates, + const std::span vk_colorBlendAttachmentStates, + + const std::span vk_dynamicStates, + const VkPipelineDynamicStateCreateInfo& vk_dynamicStateCreateInfo + ) + : createInfos{createInfos}, + vk_createInfos{vk_createInfos}, + vk_viewportStates{vk_viewportStates}, + vk_rasterizationStates{vk_rasterizationStates}, + vk_multisampleStates{vk_multisampleStates}, + vk_depthStencilStates{vk_depthStencilStates}, + vk_colorBlendStates{vk_colorBlendStates}, + vk_colorBlendAttachmentStates{vk_colorBlendAttachmentStates}, + vk_dynamicStates{ vk_dynamicStates }, + vk_dynamicStateCreateInfo{ vk_dynamicStateCreateInfo } + { + Populate(); + } +}; core::vector getDefaultDynamicStates(SPhysicalDeviceFeatures const& features) { core::vector ret = { @@ -1391,9 +1432,10 @@ void CVulkanLogicalDevice::createMeshPipelines_impl( .pViewports = nullptr, .scissorCount = 0, .pScissors = nullptr, - }); + } + ); - PopulateRasterizationPipelineCommonData( + RasterizationPipelineDataPopulator( createInfos, vk_createInfos, vk_viewportStates, @@ -1443,15 +1485,16 @@ void CVulkanLogicalDevice::createMeshPipelines_impl( { if (spec.shader) { - *(outShaderStage++) = getVkShaderStageCreateInfoFrom(spec, - shaderStage, - false, - outShaderModule, - outEntryPoints, - outRequiredSubgroupSize, - outSpecInfo, - outSpecMapEntry, - outSpecData + *(outShaderStage++) = getVkShaderStageCreateInfoFrom( + spec, + shaderStage, + false, + outShaderModule, + outEntryPoints, + outRequiredSubgroupSize, + outSpecInfo, + outSpecMapEntry, + outSpecData ); outCreateInfo->stageCount = std::distancepStages)>(outCreateInfo->pStages, outShaderStage); } @@ -1509,7 +1552,6 @@ void CVulkanLogicalDevice::createGraphicsPipelines_impl( // - Fragment Shading Rate State Creation Info // - Piepline Robustness - //maximum cleanliness, I create a struct that holds this for mesh and graphics? core::vector vk_createInfos(createInfos.size(), { VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,nullptr }); core::vector vk_rasterizationStates(createInfos.size(), { VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,nullptr,0 }); @@ -1539,7 +1581,7 @@ void CVulkanLogicalDevice::createGraphicsPipelines_impl( } ); - PopulateRasterizationPipelineCommonData( + RasterizationPipelineDataPopulator( createInfos, vk_createInfos, vk_viewportStates,