diff --git a/3rdparty/Vulkan-Headers b/3rdparty/Vulkan-Headers index 3dda5a1a87..33d7f51258 160000 --- a/3rdparty/Vulkan-Headers +++ b/3rdparty/Vulkan-Headers @@ -1 +1 @@ -Subproject commit 3dda5a1a87b62fdf3baf4680edc41c00e85a7a22 +Subproject commit 33d7f512583b8de44d1b6384aa1cf482f92e53e9 diff --git a/3rdparty/Vulkan-Tools b/3rdparty/Vulkan-Tools index 4b6f7101c1..761e7bf273 160000 --- a/3rdparty/Vulkan-Tools +++ b/3rdparty/Vulkan-Tools @@ -1 +1 @@ -Subproject commit 4b6f7101c15e09a8931f2f81c97146d0dfe68bc5 +Subproject commit 761e7bf2736f3ad326fdfc1b3c1543f4e669fd5c diff --git a/3rdparty/openexr b/3rdparty/openexr index aaf5f750d7..c8a74d9ac9 160000 --- a/3rdparty/openexr +++ b/3rdparty/openexr @@ -1 +1 @@ -Subproject commit aaf5f750d7a5fd117d79932d209f0e9816cbff1f +Subproject commit c8a74d9ac97dd579a47a7913f361a87349c0fffd diff --git a/CMakeLists.txt b/CMakeLists.txt index c21da262c0..161026137b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -181,6 +181,7 @@ option(NBL_BUILD_EXAMPLES "Enable building examples" ON) option(NBL_BUILD_MITSUBA_LOADER "Enable nbl::ext::MitsubaLoader?" ON) option(NBL_BUILD_IMGUI "Enable nbl::ext::ImGui?" ON) option(NBL_BUILD_DEBUG_DRAW "Enable Nabla Debug Draw extension?" ON) +option(NBL_BUILD_ENVMAP_IMPORTANCE_SAMPLING "Enable Nabla Envmap Importance Sampling extension?" ON) option(NBL_BUILD_OPTIX "Enable nbl::ext::OptiX?" OFF) if(NBL_COMPILE_WITH_CUDA) diff --git a/examples_tests b/examples_tests index 85d44671d1..b712d1e49c 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 85d44671d137669ce51d973c8cf76b38dad5a12a +Subproject commit b712d1e49cfc43a0ab3e82d4b6ef689f0e0f0edc diff --git a/include/nbl/builtin/hlsl/concepts/accessors/generic_shared_data.hlsl b/include/nbl/builtin/hlsl/concepts/accessors/generic_shared_data.hlsl index cc22595444..ab7a87c7dd 100644 --- a/include/nbl/builtin/hlsl/concepts/accessors/generic_shared_data.hlsl +++ b/include/nbl/builtin/hlsl/concepts/accessors/generic_shared_data.hlsl @@ -69,7 +69,7 @@ NBL_CONCEPT_END( #include template -NBL_BOOL_CONCEPT GenericDataAccessor = GenericWriteAccessor && GenericWriteAccessor; +NBL_BOOL_CONCEPT GenericDataAccessor = GenericReadAccessor && GenericWriteAccessor; } } diff --git a/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl b/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl new file mode 100644 index 0000000000..9a27e11df6 --- /dev/null +++ b/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl @@ -0,0 +1,193 @@ +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#ifndef _NBL_BUILTIN_HLSL_SAMPLING_HIERARCHICAL_IMAGE_INCLUDED_ +#define _NBL_BUILTIN_HLSL_SAMPLING_HIERARCHICAL_IMAGE_INCLUDED_ + +#include +#include +#include +#include + +namespace nbl +{ +namespace hlsl +{ +namespace sampling +{ + +template && + hierarchical_image::LuminanceReadAccessor + ) +struct LuminanceMapSampler +{ + using scalar_type = ScalarT; + using vector2_type = vector; + using vector4_type = vector; + + LuminanceAccessorT _map; + uint32_t2 _mapSize; + uint32_t2 _lastWarpPixel; + bool _aspect2x1; + + static LuminanceMapSampler create(NBL_CONST_REF_ARG(LuminanceAccessorT) lumaMap, uint32_t2 mapSize, bool aspect2x1, uint32_t2 warpSize) + { + LuminanceMapSampler result; + result._map = lumaMap; + result._mapSize = mapSize; + result._lastWarpPixel = warpSize - uint32_t2(1, 1); + result._aspect2x1 = aspect2x1; + return result; + } + + static bool choseSecond(scalar_type first, scalar_type second, NBL_REF_ARG(scalar_type) xi) + { + // numerical resilience against IEEE754 + scalar_type dummy = scalar_type(0); + PartitionRandVariable partition; + partition.leftProb = scalar_type(1) / (scalar_type(1) + (second / first)); + return partition(xi, dummy); + } + + vector2_type binarySearch(const uint32_t2 coord) + { + // We use _lastWarpPixel here for corner sampling + float32_t2 xi = float32_t2(coord)/ _lastWarpPixel; + uint32_t2 p = uint32_t2(0, 0); + const uint32_t2 mip2x1 = findMSB(_mapSize.y); + + if (_aspect2x1) { + // do one split in the X axis first cause penultimate full mip would have been 2x1 + p.x = choseSecond(_map.texelFetch(uint32_t2(0, 0), mip2x1), _map.texelFetch(uint32_t2(1, 0), mip2x1), xi.x) ? 1 : 0; + } + + for (int i = mip2x1 - 1; i >= 0; i--) + { + p <<= 1; + const vector4_type values = _map.texelGather(p, i); + scalar_type wx_0, wx_1; + { + const scalar_type wy_0 = values[3] + values[2]; + const scalar_type wy_1 = values[1] + values[0]; + if (choseSecond(wy_0, wy_1, xi.y)) + { + p.y |= 1; + wx_0 = values[0]; + wx_1 = values[1]; + } + else + { + wx_0 = values[3]; + wx_1 = values[2]; + } + } + if (choseSecond(wx_0, wx_1, xi.x)) + p.x |= 1; + } + + + // If we don`t add xi, the sample will clump to the lowest corner of environment map texel. We add xi to simulate uniform distribution within a pixel and make the sample continuous. This is why we compute the pdf not from the normalized luminance of the texel, instead from the reciprocal of the Jacobian. + const vector2_type directionUV = (vector2_type(p.x, p.y) + xi) / vector2_type(_mapSize); + return directionUV; + } + + matrix sampleUvs(uint32_t2 sampleCoord) NBL_CONST_MEMBER_FUNC + { + const vector2_type dir0 = binarySearch(sampleCoord + vector2_type(0, 1)); + const vector2_type dir1 = binarySearch(sampleCoord + vector2_type(1, 1)); + const vector2_type dir2 = binarySearch(sampleCoord + vector2_type(1, 0)); + const vector2_type dir3 = binarySearch(sampleCoord); + return matrix( + dir0, + dir1, + dir2, + dir3 + ); + } +}; + +template && + concepts::accessors::GenericReadAccessor && + hierarchical_image::HierarchicalSampler && + concepts::Warp) +struct HierarchicalImage +{ + using scalar_type = ScalarT; + using vector2_type = vector; + using vector3_type = vector; + using vector4_type = vector; + LuminanceAccessorT _lumaMap; + HierarchicalSamplerT _warpMap; + uint32_t2 _warpSize; + uint32_t2 _lastWarpPixel; + scalar_type _rcpAvgLuma; + + static HierarchicalImage create(NBL_CONST_REF_ARG(LuminanceAccessorT) lumaMap, NBL_CONST_REF_ARG(HierarchicalSamplerT) warpMap, uint32_t2 warpSize, scalar_type avgLuma) + { + HierarchicalImage result; + result._lumaMap = lumaMap; + result._warpMap = warpMap; + result._warpSize = warpSize; + result._lastWarpPixel = warpSize - uint32_t2(1, 1); + result._rcpAvgLuma = ScalarT(1.0) / avgLuma; + return result; + } + + vector2_type inverseWarp_and_deferredPdf(NBL_REF_ARG(scalar_type) pdf, vector3_type direction) NBL_CONST_MEMBER_FUNC + { + vector2_type envmapUv = PostWarpT::inverseWarp(direction); + scalar_type luma; + _lumaMap.get(envmapUv, luma); + pdf = (luma * _rcpAvgLuma) * PostWarpT::backwardDensity(direction); + return envmapUv; + } + + scalar_type deferredPdf(vector3_type direction) NBL_CONST_MEMBER_FUNC + { + vector2_type envmapUv = PostWarpT::inverseWarp(direction); + scalar_type luma; + _lumaMap.get(envmapUv, luma); + return luma * _rcpAvgLuma * PostWarpT::backwardDensity(direction); + } + + vector3_type generate_and_pdf(NBL_REF_ARG(scalar_type) pdf, NBL_REF_ARG(vector2_type) uv, vector2_type xi) NBL_CONST_MEMBER_FUNC + { + const vector2_type texelCoord = xi * float32_t2(_lastWarpPixel); + + matrix uvs = _warpMap.sampleUvs(uint32_t2(texelCoord)); + + const vector2_type interpolant = frac(texelCoord); + + const vector2_type xDiffs[] = { + uvs[2] - uvs[3], + uvs[1] - uvs[0] + }; + const vector2_type yVals[] = { + xDiffs[0] * interpolant.x + uvs[3], + xDiffs[1] * interpolant.x + uvs[0] + }; + const vector2_type yDiff = yVals[1] - yVals[0]; + uv = yDiff * interpolant.y + yVals[0]; + + const WarpResult warpResult = PostWarpT::warp(uv); + + const scalar_type detInterpolJacobian = determinant(matrix( + lerp(xDiffs[0], xDiffs[1], interpolant.y), // first column dFdx + yDiff // second column dFdy + )) * _lastWarpPixel.x * _lastWarpPixel.y; + + pdf = abs(warpResult.density / detInterpolJacobian); + + return warpResult.dst; + } +}; + +} +} +} + +#endif diff --git a/include/nbl/builtin/hlsl/sampling/hierarchical_image/accessors.hlsl b/include/nbl/builtin/hlsl/sampling/hierarchical_image/accessors.hlsl new file mode 100644 index 0000000000..304293b93e --- /dev/null +++ b/include/nbl/builtin/hlsl/sampling/hierarchical_image/accessors.hlsl @@ -0,0 +1,62 @@ +#ifndef _NBL_BUILTIN_HLSL_HIERARCHICAL_IMAGE_ACCESSORS_INCLUDED_ +#define _NBL_BUILTIN_HLSL_CONCEPTS_ACCESSORS_HIERARCHICAL_IMAGE_INCLUDED_ + +#include "nbl/builtin/hlsl/concepts/accessors/generic_shared_data.hlsl" + +namespace nbl +{ +namespace hlsl +{ +namespace sampling +{ +namespace hierarchical_image +{ +// declare concept +#define NBL_CONCEPT_NAME LuminanceReadAccessor +#define NBL_CONCEPT_TPLT_PRM_KINDS (typename)(typename) +#define NBL_CONCEPT_TPLT_PRM_NAMES (U)(ScalarT) +// not the greatest syntax but works +#define NBL_CONCEPT_PARAM_0 (a,U) +#define NBL_CONCEPT_PARAM_1 (coord,uint32_t2) +#define NBL_CONCEPT_PARAM_2 (level,uint32_t) +// start concept +NBL_CONCEPT_BEGIN(3) +// need to be defined AFTER the concept begins +#define a NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_0 +#define coord NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_1 +#define level NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_2 +NBL_CONCEPT_END( + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((a.template texelFetch(coord,level)) , ::nbl::hlsl::is_same_v, ScalarT)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((a.template texelGather(coord,level)) , ::nbl::hlsl::is_same_v, vector)) +); +#undef level +#undef coord +#undef a +#include + +// sampleUvs return 4 UVs in a square to calculate the jacobian matrix +// declare concept +#define NBL_CONCEPT_NAME HierarchicalSampler +#define NBL_CONCEPT_TPLT_PRM_KINDS (typename)(typename) +#define NBL_CONCEPT_TPLT_PRM_NAMES (HierarchicalSamplerT)(ScalarT) +// not the greatest syntax but works +#define NBL_CONCEPT_PARAM_0 (sampler,HierarchicalSamplerT) +#define NBL_CONCEPT_PARAM_1 (coord,vector) +// start concept +NBL_CONCEPT_BEGIN(2) +// need to be defined AFTER the concept begins +#define sampler NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_0 +#define coord NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_1 +NBL_CONCEPT_END( + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((sampler.template sampleUvs(coord)) , ::nbl::hlsl::is_same_v, matrix)) +); +#undef sampler +#undef coord +#include + +} +} +} +} + +#endif diff --git a/include/nbl/builtin/hlsl/sampling/hierarchical_image/common.hlsl b/include/nbl/builtin/hlsl/sampling/hierarchical_image/common.hlsl new file mode 100644 index 0000000000..2f8ad4b019 --- /dev/null +++ b/include/nbl/builtin/hlsl/sampling/hierarchical_image/common.hlsl @@ -0,0 +1,26 @@ +#ifndef _NBL_HLSL_SAMPLING_HIERARCHICAL_IMAGE_COMMON_INCLUDED_ +#define _NBL_HLSL_SAMPLING_HIERARCHICAL_IMAGE_COMMON_INCLUDED_ + +#include "nbl/builtin/hlsl/cpp_compat.hlsl" + +namespace nbl +{ +namespace hlsl +{ +namespace sampling +{ +namespace hierarchical_image +{ + +struct SLumaGenPushConstants +{ + float32_t3 lumaRGBCoefficients; + uint32_t2 lumaMapResolution; +}; + +} +} +} +} + +#endif diff --git a/include/nbl/builtin/hlsl/sampling/hierarchical_image/gen_luma.comp.hlsl b/include/nbl/builtin/hlsl/sampling/hierarchical_image/gen_luma.comp.hlsl new file mode 100644 index 0000000000..f9ff6299b6 --- /dev/null +++ b/include/nbl/builtin/hlsl/sampling/hierarchical_image/gen_luma.comp.hlsl @@ -0,0 +1,25 @@ +#include "common.hlsl" + +using namespace nbl; +using namespace nbl::hlsl; +using namespace nbl::hlsl::sampling::hierarchical_image; + +[[vk::push_constant]] SLumaGenPushConstants pc; + +[[vk::binding(0, 0)]] Texture2D envMap; +[[vk::binding(1, 0)]] RWTexture2D outImage; + +[numthreads(WORKGROUP_DIM, WORKGROUP_DIM, 1)] +[shader("compute")] +void main(uint32_t3 threadID : SV_DispatchThreadID) +{ + if (all(threadID < pc.lumaMapResolution)) + { + + const float uv_y = (float(threadID.y) + float(0.5f)) / pc.lumaMapResolution.y; + const float32_t3 envMapSample = envMap.Load(float32_t3(threadID.xy, 0)); + const float32_t luma = hlsl::dot(envMapSample, pc.lumaRGBCoefficients) * sin(numbers::pi * uv_y); + + outImage[threadID.xy] = luma; + } +} diff --git a/include/nbl/builtin/hlsl/sampling/hierarchical_image/gen_warp.comp.hlsl b/include/nbl/builtin/hlsl/sampling/hierarchical_image/gen_warp.comp.hlsl new file mode 100644 index 0000000000..8c2b2c9bc3 --- /dev/null +++ b/include/nbl/builtin/hlsl/sampling/hierarchical_image/gen_warp.comp.hlsl @@ -0,0 +1,48 @@ +#include "nbl/builtin/hlsl/sampling/hierarchical_image.hlsl" + +[[vk::binding(0, 0)]] Texture2D lumaMap; + +[[vk::binding(1, 0)]] RWTexture2D outImage; + +using namespace nbl; +using namespace nbl::hlsl; +using namespace nbl::hlsl::sampling; + +struct LuminanceAccessor +{ + float32_t texelFetch(uint32_t2 coord, uint32_t level) + { + return lumaMap.Load(uint32_t3(coord, level)); + } + + float32_t4 texelGather(uint32_t2 coord, uint32_t level) + { + return float32_t4( + lumaMap.Load(uint32_t3(coord, level), uint32_t2(0, 1)), + lumaMap.Load(uint32_t3(coord, level), uint32_t2(1, 1)), + lumaMap.Load(uint32_t3(coord, level), uint32_t2(1, 0)), + lumaMap.Load(uint32_t3(coord, level), uint32_t2(0, 0)) + ); + + } +}; + +[numthreads(WORKGROUP_DIM, WORKGROUP_DIM, 1)] +[shader("compute")] +void main(uint32_t3 threadID : SV_DispatchThreadID) +{ + LuminanceAccessor luminanceAccessor; + uint32_t lumaMapWidth, lumaMapHeight; + + lumaMap.GetDimensions(lumaMapWidth, lumaMapHeight); + + using LuminanceSampler = LuminanceMapSampler; + + LuminanceSampler luminanceSampler = + LuminanceSampler::create(luminanceAccessor, uint32_t2(lumaMapWidth, lumaMapHeight), lumaMapWidth != lumaMapHeight, uint32_t2(lumaMapWidth, lumaMapHeight)); + + uint32_t2 pixelCoord = threadID.xy; + + outImage[pixelCoord] = luminanceSampler.binarySearch(pixelCoord); + +} diff --git a/include/nbl/builtin/hlsl/sampling/warp.hlsl b/include/nbl/builtin/hlsl/sampling/warp.hlsl new file mode 100644 index 0000000000..37c1800f51 --- /dev/null +++ b/include/nbl/builtin/hlsl/sampling/warp.hlsl @@ -0,0 +1,55 @@ +#ifndef _NBL_BUILTIN_HLSL_SAMPLING_CONCEPTS_WARP_INCLUDED_ +#define _NBL_BUILTIN_HLSL_SAMPLING_CONCEPTS_WARP_INCLUDED_ + +#include + +namespace nbl +{ +namespace hlsl +{ +namespace sampling +{ + +template +struct WarpResult +{ + CodomainT dst; + DensityT density; +}; +} + +namespace concepts +{ + +// declare concept +#define NBL_CONCEPT_NAME Warp +#define NBL_CONCEPT_TPLT_PRM_KINDS (typename) +#define NBL_CONCEPT_TPLT_PRM_NAMES (U) +// not the greatest syntax but works +#define NBL_CONCEPT_PARAM_0 (warper,U) +#define NBL_CONCEPT_PARAM_1 (xi,typename U::domain_type) +#define NBL_CONCEPT_PARAM_2 (dst,typename U::codomain_type) +// start concept +NBL_CONCEPT_BEGIN(3) +#define warper NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_0 +#define xi NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_1 +#define dst NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_2 +NBL_CONCEPT_END( + ((NBL_CONCEPT_REQ_TYPE)(U::domain_type)) + ((NBL_CONCEPT_REQ_TYPE)(U::codomain_type)) + ((NBL_CONCEPT_REQ_TYPE)(U::density_type)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((warper.template warp(xi)) , ::nbl::hlsl::is_same_v, sampling::WarpResult)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((warper.template forwardDensity(xi)) , ::nbl::hlsl::is_same_v, typename U::density_type)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((warper.template backwardDensity(dst)) , ::nbl::hlsl::is_same_v, typename U::density_type)) +); +#undef dst +#undef xi +#undef warper +#include + +} + +} +} + +#endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/sampling/warps/spherical.hlsl b/include/nbl/builtin/hlsl/sampling/warps/spherical.hlsl new file mode 100644 index 0000000000..6094befe45 --- /dev/null +++ b/include/nbl/builtin/hlsl/sampling/warps/spherical.hlsl @@ -0,0 +1,80 @@ +#ifndef _NBL_BUILTIN_HLSL_SAMPLING_WARP_SPHERICAL_INCLUDED_ +#define _NBL_BUILTIN_HLSL_SAMPLING_WARP_SPHERICAL_INCLUDED_ + +#include +#include +#include + +namespace nbl +{ +namespace hlsl +{ +namespace sampling +{ +namespace warp +{ + +template +struct Spherical +{ + using density_type = T; + using domain_type = vector; + using codomain_type = vector; + + template ) + static WarpResult warp(const DomainT uv) + { + codomain_type dir; + dir.x = cos(uv.x * density_type(2) * numbers::pi); + dir.z = sqrt(density_type(1) - (dir.x * dir.x)); + if (uv.x > density_type(0.5)) + dir.z = -dir.z; + const density_type theta = uv.y * numbers::pi; + const density_type cosTheta = cos(theta); + const density_type sinTheta = sqrt(density_type(1) - (cosTheta * cosTheta)); + dir.xz *= sinTheta; + dir.y = cosTheta; + + WarpResult warpResult; + warpResult.dst = dir; + warpResult.density = density_type(1) / (density_type(2) * sinTheta * numbers::pi * numbers::pi); + + return warpResult; + } + + template ) + static domain_type inverseWarp(const CodomainT v) + { + const density_type phi = atan2(v.z, v.x); + const density_type theta = acos(v.y); + density_type uv_x = phi * density_type(0.5) * numbers::inv_pi; + if (uv_x < density_type(0)) + uv_x += density_type(1); + density_type uv_y = theta * numbers::inv_pi; + return domain_type(uv_x, uv_y); + } + + + template ) + static density_type forwardDensity(const DomainT uv) + { + const density_type theta = uv.y * numbers::pi; + return density_type(1) / (sin(theta) * density_type(2) * numbers::pi * numbers::pi); + + } + + template ) + static density_type backwardDensity(const CodomainT dst) + { + const density_type cosTheta = dst.y; + const density_type sinTheta = sqrt(density_type(1) - (cosTheta * cosTheta)); + return density_type(1) / (sinTheta * density_type(2) * numbers::pi * numbers::pi); + } +}; + +} +} +} +} + +#endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/workgroup2/arithmetic_config.hlsl b/include/nbl/builtin/hlsl/workgroup2/arithmetic_config.hlsl index 22c93ce193..aa395ad524 100644 --- a/include/nbl/builtin/hlsl/workgroup2/arithmetic_config.hlsl +++ b/include/nbl/builtin/hlsl/workgroup2/arithmetic_config.hlsl @@ -205,7 +205,7 @@ struct SArithmeticConfiguration #undef DEFINE_ASSIGN } - std::string getConfigTemplateStructString() + std::string getConfigTemplateStructString() NBL_CONST_MEMBER_FUNC { std::ostringstream os; os << "nbl::hlsl::workgroup2::ArithmeticConfiguration<" << WorkgroupSizeLog2 << "," << SubgroupSizeLog2 << "," << ItemsPerInvocation_0 << ">;"; diff --git a/include/nbl/core/sampling/EnvmapSampler.h b/include/nbl/core/sampling/EnvmapSampler.h new file mode 100644 index 0000000000..fbd2b8abd0 --- /dev/null +++ b/include/nbl/core/sampling/EnvmapSampler.h @@ -0,0 +1,148 @@ +#ifndef _NBL_CORE_ENVMAP_SAMPLER_INCLUDED_ +#define _NBL_CORE_ENVMAP_SAMPLER_INCLUDED_ + +#include "nbl/video/declarations.h" + +namespace nbl::core +{ + +class NBL_API2 EnvmapSampler final : public core::IReferenceCounted +{ + public: + + static constexpr uint32_t MaxMipCountLuminance = 13u; + static constexpr uint32_t DefaultLumaMipMapGenWorkgroupDimension = 16u; + static constexpr uint32_t DefaultWarpMapGenWorkgroupDimension = 16u; + + struct SCachedCreationParameters + { + core::smart_refctd_ptr utilities; + uint32_t genLumaMapWorkgroupDimension = DefaultLumaMipMapGenWorkgroupDimension; + uint32_t genWarpMapWorkgroupDimension = DefaultWarpMapGenWorkgroupDimension; + }; + + struct SCreationParameters : public SCachedCreationParameters + { + core::smart_refctd_ptr assetManager = nullptr; + core::smart_refctd_ptr envMap = nullptr; + + inline bool validate() const + { + const auto validation = std::to_array + ({ + std::make_pair(bool(assetManager), "Invalid `creationParams.assetManager` is nullptr!"), + std::make_pair(bool(utilities), "Invalid `creationParams.utilities` is nullptr!"), + std::make_pair(bool(envMap), "Invalid `creationParams.envMap` is nullptr!"), + }); + + system::logger_opt_ptr logger = utilities->getLogger(); + for (const auto& [ok, error] : validation) + if (!ok) + { + logger.log(error, system::ILogger::ELL_ERROR); + return false; + } + + assert(bool(assetManager->getSystem())); + + return true; + } + + }; + + static core::smart_refctd_ptr create(SCreationParameters&& params); + + static core::smart_refctd_ptr createGenLumaPipelineLayout(video::ILogicalDevice* device); + + static core::smart_refctd_ptr createGenWarpPipelineLayout(video::ILogicalDevice* device); + + //! mounts the extension's archive to given system - useful if you want to create your own shaders with common header included + static core::smart_refctd_ptr mount(core::smart_refctd_ptr logger, system::ISystem* system, video::ILogicalDevice* device, const std::string_view archiveAlias = ""); + + static core::smart_refctd_ptr createGenLumaPipeline(const SCreationParameters& params, const video::IGPUPipelineLayout* pipelineLayout); + + static core::smart_refctd_ptr createGenWarpPipeline(const SCreationParameters& params, const video::IGPUPipelineLayout* pipelineLayout); + + static core::smart_refctd_ptr createLumaMap(video::ILogicalDevice* device, asset::VkExtent3D extent, uint32_t mipCount, std::string_view debugName = ""); + + static core::smart_refctd_ptr createWarpMap(video::ILogicalDevice* device, asset::VkExtent3D extent, std::string_view debugName = ""); + + void computeWarpMap(video::IQueue* queue); + + // use this to synchronize warp map after computeWarpMap call + nbl::video::IGPUCommandBuffer::SPipelineBarrierDependencyInfo::image_barrier_t getWarpMapBarrier( + core::bitflag dstStageMask, + core::bitflag dstAccessMask, + nbl::video::IGPUImage::LAYOUT oldLayout); + + // use this to synchronize luma map after computeWarpMap call + nbl::video::IGPUCommandBuffer::SPipelineBarrierDependencyInfo::image_barrier_t getLumaMapBarrier( + core::bitflag dstStageMask, + core::bitflag dstAccessMask, + nbl::video::IGPUImage::LAYOUT oldLayout); + + inline core::smart_refctd_ptr getLumaMapView() const + { + return m_lumaMap; + } + + inline core::smart_refctd_ptr getWarpMapView() const + { + return m_warpMap; + } + + inline hlsl::float32_t getAvgLuma() const + { + return m_avgLuma; + } + + protected: + struct ConstructorParams + { + SCachedCreationParameters creationParams; + hlsl::uint32_t2 lumaWorkgroupCount; + hlsl::uint32_t2 warpWorkgroupCount; + core::smart_refctd_ptr lumaMap; + core::smart_refctd_ptr warpMap; + core::smart_refctd_ptr genLumaPipeline; + core::smart_refctd_ptr genLumaDescriptorSet; + core::smart_refctd_ptr genWarpPipeline; + core::smart_refctd_ptr genWarpDescriptorSet; + }; + + explicit EnvmapSampler(ConstructorParams&& params) : + m_cachedCreationParams(std::move(params.creationParams)), + m_lumaWorkgroupCount(params.lumaWorkgroupCount), + m_warpWorkgroupCount(params.warpWorkgroupCount), + m_lumaMap(std::move(params.lumaMap)), + m_warpMap(std::move(params.warpMap)), + m_genLumaPipeline(std::move(params.genLumaPipeline)), + m_genLumaDescriptorSet(std::move(params.genLumaDescriptorSet)), + m_genWarpPipeline(std::move(params.genWarpPipeline)), + m_genWarpDescriptorSet(std::move(params.genWarpDescriptorSet)) + {} + + ~EnvmapSampler() override {} + + private: + + SCachedCreationParameters m_cachedCreationParams; + + hlsl::uint32_t2 m_lumaWorkgroupCount; + hlsl::uint32_t2 m_warpWorkgroupCount; + + hlsl::float32_t m_avgLuma; + + core::smart_refctd_ptr m_lumaMap; + core::smart_refctd_ptr m_warpMap; + + core::smart_refctd_ptr m_genLumaPipeline; + core::smart_refctd_ptr m_genLumaDescriptorSet; + + core::smart_refctd_ptr m_genWarpPipeline; + core::smart_refctd_ptr m_genWarpDescriptorSet; + +}; + +} +#endif diff --git a/include/nbl/ext/EnvmapImportanceSampling/EnvmapImportanceSampling.h b/include/nbl/ext/EnvmapImportanceSampling/EnvmapImportanceSampling.h deleted file mode 100644 index 678adf59a9..0000000000 --- a/include/nbl/ext/EnvmapImportanceSampling/EnvmapImportanceSampling.h +++ /dev/null @@ -1,92 +0,0 @@ -// Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. -// This file is part of the "Nabla Engine". -// For conditions of distribution and use, see copyright notice in nabla.h - -#ifndef _NBL_EXT_ENVMAP_IMPORTANCE_SAMPLING_INCLUDED_ -#define _NBL_EXT_ENVMAP_IMPORTANCE_SAMPLING_INCLUDED_ - -#include "nabla.h" -#include "nbl/video/IGPUShader.h" -#include "nbl/asset/ICPUShader.h" - -namespace nbl::ext::EnvmapImportanceSampling -{ - -class EnvmapImportanceSampling -{ - public: - EnvmapImportanceSampling(video::IVideoDriver* _driver) : m_driver(_driver) - {} - ~EnvmapImportanceSampling() = default; - - // Shader and Resources for Generating Luminance MipMaps from EnvMap - static constexpr uint32_t MaxMipCountLuminance = 13u; - static constexpr uint32_t DefaultLumaMipMapGenWorkgroupDimension = 16u; - static constexpr uint32_t DefaultWarpMapGenWorkgroupDimension = 16u; - - void initResources( - core::smart_refctd_ptr envmap, - uint32_t lumaGenWorkgroupDimension = DefaultLumaMipMapGenWorkgroupDimension, - uint32_t warpMapGenWorkgroupDimension = DefaultWarpMapGenWorkgroupDimension); - void deinitResources(); - - // returns if RIS should be enabled based on variance calculations - inline bool computeWarpMap(const float envMapRegularizationFactor, float& pdfNormalizationFactor) - { - [[maybe_unused]] float dummy; - return computeWarpMap(envMapRegularizationFactor,pdfNormalizationFactor,dummy); - } - bool computeWarpMap(const float envMapRegularizationFactor, float& pdfNormalizationFactor, float& maxEmittanceLuma); - - core::smart_refctd_ptr getLuminanceImageView() { return m_luminance; } - core::smart_refctd_ptr getWarpMapImageView() { return m_warpMap; } - - private: - #define uint uint32_t - struct uvec2 - { - uint x,y; - }; - struct vec2 - { - float x,y; - }; - struct vec3 - { - float x,y,z; - }; - #define vec4 core::vectorSIMDf - #define mat4 core::matrix4SIMD - #define mat4x3 core::matrix3x4SIMD - #include "nbl/builtin/glsl/ext/EnvmapImportanceSampling/structs.glsl" - #undef uint - #undef vec4 - #undef mat4 - #undef mat4x3 - inline uint32_t calcMeasurementBufferSize() const - { - return sizeof(nbl_glsl_ext_EnvmapSampling_LumaMeasurement_t)*m_lumaWorkgroups[0]*m_lumaWorkgroups[1]; - } - #undef NBL_GLSL_EXT_ENVMAP_SAMPLING_LUMA_MEASUREMENTS - - uint32_t m_lumaWorkgroups[2]; - uint32_t m_warpWorkgroups[2]; - - core::smart_refctd_ptr m_luminance; - core::smart_refctd_ptr m_warpMap; // Warps Sample based on EnvMap Luminance - - core::smart_refctd_ptr m_lumaDS; - core::smart_refctd_ptr m_lumaMeasurePipeline; - core::smart_refctd_ptr m_lumaGenPipeline; - - // Shader and Resources for EnvironmentalMap Sample Warping - core::smart_refctd_ptr m_warpDS; - core::smart_refctd_ptr m_warpGPUShader; - core::smart_refctd_ptr m_warpPipeline; - - video::IVideoDriver* m_driver; -}; - -} - -#endif diff --git a/src/nbl/CMakeLists.txt b/src/nbl/CMakeLists.txt index 18a25c8619..c359535468 100644 --- a/src/nbl/CMakeLists.txt +++ b/src/nbl/CMakeLists.txt @@ -127,7 +127,9 @@ unset(NABLA_HEADERS_PUBLIC2 ${NBL_TMP_FULL_PATHS}) set(NBL_CORE_SOURCES core/alloc/refctd_memory_resource.cpp core/hash/blake.cpp + core/sampling/EnvmapSampler.cpp ) + set(NBL_SYSTEM_SOURCES system/DefaultFuncPtrLoader.cpp system/IFileBase.cpp diff --git a/src/nbl/builtin/CMakeLists.txt b/src/nbl/builtin/CMakeLists.txt index d228be8ea4..bc20cc52df 100644 --- a/src/nbl/builtin/CMakeLists.txt +++ b/src/nbl/builtin/CMakeLists.txt @@ -281,6 +281,13 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/sampling/spherical_rectangle. LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/sampling/cos_weighted_spheres.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/sampling/quotient_and_pdf.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/sampling/uniform_spheres.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/sampling/warp.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/sampling/warps/spherical.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/sampling/hierarchical_image.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/sampling/hierarchical_image/accessors.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/sampling/hierarchical_image/common.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/sampling/hierarchical_image/gen_luma.comp.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/sampling/hierarchical_image/gen_warp.comp.hlsl") # LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/ndarray_addressing.hlsl") # @@ -350,7 +357,6 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/workgroup2/shared_scan.hlsl") #Extensions LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/ext/FullScreenTriangle/SVertexAttributes.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/ext/FullScreenTriangle/default.vert.hlsl") -LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/text_rendering/msdf.hlsl") #memory LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/memory.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/memory_accessor.hlsl") diff --git a/src/nbl/core/sampling/EnvmapSampler.cpp b/src/nbl/core/sampling/EnvmapSampler.cpp new file mode 100644 index 0000000000..8d4d968a17 --- /dev/null +++ b/src/nbl/core/sampling/EnvmapSampler.cpp @@ -0,0 +1,778 @@ +#include "nbl/core/sampling/EnvmapSampler.h" +#include "nbl/builtin/hlsl/sampling/hierarchical_image/common.hlsl" +#include "nlohmann/detail/input/parser.hpp" + +using namespace nbl; +using namespace core; +using namespace video; +using namespace system; +using namespace asset; +using namespace hlsl; +using namespace nbl::hlsl::sampling::hierarchical_image; + +namespace nbl::core +{ + +class EnvmapSampler; + +namespace +{ + constexpr std::string_view NBL_EXT_MOUNT_ENTRY = "nbl/core/builtin"; + + // image must have the first mip layout set to transfer src, and the rest to dst + void generateMipmap(video::IGPUCommandBuffer* cmdBuf, IGPUImage* image) + { + const auto mipLevels = image->getCreationParameters().mipLevels; + const auto extent = image->getCreationParameters().extent; + for (uint32_t srcMip_i = 0; srcMip_i < mipLevels-1; srcMip_i++) + { + + const IGPUCommandBuffer::SImageBlit blit = { + .srcMinCoord = {0, 0, 0}, + .srcMaxCoord = {extent.width >> (srcMip_i), extent.height >> (srcMip_i), 1}, + .dstMinCoord = {0, 0, 0}, + .dstMaxCoord = {extent.width >> srcMip_i + 1, extent.height >> srcMip_i + 1, 1}, + .layerCount = 1, + .srcBaseLayer = 0, + .dstBaseLayer = 0, + .srcMipLevel = srcMip_i, + .dstMipLevel = srcMip_i + 1, + .aspectMask = IGPUImage::E_ASPECT_FLAGS::EAF_COLOR_BIT, + }; + cmdBuf->blitImage(image, IImage::LAYOUT::TRANSFER_SRC_OPTIMAL, image, IImage::LAYOUT::TRANSFER_DST_OPTIMAL, { &blit, 1 }, IGPUSampler::E_TEXTURE_FILTER::ETF_LINEAR); + + // last mip no need to transition + if (srcMip_i + 1 == mipLevels - 1) break; + + IGPUCommandBuffer::SPipelineBarrierDependencyInfo::image_barrier_t barrier = { + .barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::BLIT_BIT, + .srcAccessMask = ACCESS_FLAGS::TRANSFER_WRITE_BIT, + .dstStageMask = PIPELINE_STAGE_FLAGS::BLIT_BIT, + .dstAccessMask = ACCESS_FLAGS::TRANSFER_READ_BIT + } + }, + .image = image, + .subresourceRange = { + .aspectMask = IImage::EAF_COLOR_BIT, + .baseMipLevel = srcMip_i + 1, + .levelCount = 1, + .baseArrayLayer = 0u, + .layerCount = 1u + }, + .oldLayout = IImage::LAYOUT::TRANSFER_DST_OPTIMAL, + .newLayout = IImage::LAYOUT::TRANSFER_SRC_OPTIMAL, + }; + cmdBuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = {&barrier, 1} }); + + } + } + + core::smart_refctd_ptr createTexture(video::ILogicalDevice* device, const asset::VkExtent3D extent, E_FORMAT format, uint32_t mipLevels = 1u, uint32_t layers = 0u) + { + const auto real_layers = layers ? layers:1u; + + IGPUImage::SCreationParams imgParams; + imgParams.extent = extent; + imgParams.arrayLayers = real_layers; + imgParams.flags = static_cast(0); + imgParams.format = format; + imgParams.mipLevels = mipLevels; + imgParams.samples = IImage::ESCF_1_BIT; + imgParams.type = IImage::ET_2D; + imgParams.usage = IImage::EUF_STORAGE_BIT | IImage::EUF_TRANSFER_SRC_BIT | IImage::EUF_TRANSFER_DST_BIT | IImage::EUF_SAMPLED_BIT; + const auto image = device->createImage(std::move(imgParams)); + auto imageMemReqs = image->getMemoryReqs(); + imageMemReqs.memoryTypeBits &= device->getPhysicalDevice()->getDeviceLocalMemoryTypeBits(); + device->allocate(imageMemReqs, image.get()); + + IGPUImageView::SCreationParams viewparams; + viewparams.subUsages = IImage::EUF_STORAGE_BIT | IImage::EUF_SAMPLED_BIT; + viewparams.flags = static_cast(0); + viewparams.format = format; + viewparams.image = std::move(image); + viewparams.viewType = layers ? IGPUImageView::ET_2D_ARRAY:IGPUImageView::ET_2D; + viewparams.subresourceRange.aspectMask = IImage::EAF_COLOR_BIT; + viewparams.subresourceRange.baseArrayLayer = 0u; + viewparams.subresourceRange.layerCount = real_layers; + viewparams.subresourceRange.baseMipLevel = 0u; + viewparams.subresourceRange.levelCount = mipLevels; + + return device->createImageView(std::move(viewparams)); + } + + core::smart_refctd_ptr getShaderSource( asset::IAssetManager* assetManager, const char* filePath, system::ILogger* logger) + { + IAssetLoader::SAssetLoadParams lparams = {}; + lparams.logger = logger; + lparams.workingDirectory = NBL_EXT_MOUNT_ENTRY; + auto bundle = assetManager->getAsset(filePath, lparams); + if (bundle.getContents().empty() || bundle.getAssetType()!=IAsset::ET_SHADER) + { + const auto assetType = bundle.getAssetType(); + logger->log("Shader %s not found!", ILogger::ELL_ERROR, filePath); + exit(-1); + } + auto firstAssetInBundle = bundle.getContents()[0]; + return smart_refctd_ptr_static_cast(firstAssetInBundle); + } +} + +core::smart_refctd_ptr EnvmapSampler::create(SCreationParameters&& params) +{ + auto* const logger = params.utilities->getLogger(); + + if (!params.validate()) + { + logger->log("Failed creation parameters validation!", ILogger::ELL_ERROR); + return nullptr; + } + + const auto EnvmapExtent = params.envMap->getCreationParameters().image->getCreationParameters().extent; + // we don't need the 1x1 mip for anything + const uint32_t MipCountLuminance = IImage::calculateFullMipPyramidLevelCount(EnvmapExtent,IImage::ET_2D)-1; + const auto EnvMapPoTExtent = [MipCountLuminance]() -> asset::VkExtent3D + { + const uint32_t width = 0x1u<>1u,1u }; + }(); + auto calcWorkgroupSize = [](const asset::VkExtent3D extent, const uint32_t workgroupDimension) -> uint32_t2 + { + return uint32_t2(extent.width - 1, extent.height - 1) / workgroupDimension + uint32_t2(1); + }; + + const auto device = params.utilities->getLogicalDevice(); + + ConstructorParams constructorParams; + + constructorParams.lumaWorkgroupCount = calcWorkgroupSize(EnvMapPoTExtent, params.genLumaMapWorkgroupDimension); + constructorParams.lumaMap = createLumaMap(device, EnvMapPoTExtent, MipCountLuminance); + + const auto upscale = 0; + const asset::VkExtent3D WarpMapExtent = {EnvMapPoTExtent.width<createDescriptorPoolForDSLayouts(IDescriptorPool::ECF_UPDATE_AFTER_BIND_BIT, genLumaPipelineLayout->getDescriptorSetLayouts()); + const auto genLumaDescriptorSet = genLumaDescriptorPool->createDescriptorSet(core::smart_refctd_ptr(genLumaPipelineLayout->getDescriptorSetLayouts()[0])); + + const auto genWarpPipelineLayout = createGenWarpPipelineLayout(device); + constructorParams.genWarpPipeline = createGenWarpPipeline(params, genWarpPipelineLayout.get()); + const auto genWarpDescriptorPool = device->createDescriptorPoolForDSLayouts(IDescriptorPool::ECF_UPDATE_AFTER_BIND_BIT, genWarpPipelineLayout->getDescriptorSetLayouts()); + const auto genWarpDescriptorSet = genWarpDescriptorPool->createDescriptorSet(core::smart_refctd_ptr(genWarpPipelineLayout->getDescriptorSetLayouts()[0])); + + IGPUDescriptorSet::SDescriptorInfo envMapDescriptorInfo; + envMapDescriptorInfo.desc = params.envMap; + envMapDescriptorInfo.info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; + + IGPUDescriptorSet::SDescriptorInfo lumaMapGeneralDescriptorInfo; + lumaMapGeneralDescriptorInfo.desc = constructorParams.lumaMap; + lumaMapGeneralDescriptorInfo.info.image.imageLayout = IImage::LAYOUT::GENERAL; + + IGPUDescriptorSet::SDescriptorInfo lumaMapReadDescriptorInfo; + lumaMapReadDescriptorInfo.desc = constructorParams.lumaMap; + lumaMapReadDescriptorInfo.info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; + + IGPUDescriptorSet::SDescriptorInfo warpMapDescriptorInfo; + warpMapDescriptorInfo.desc = constructorParams.warpMap; + warpMapDescriptorInfo.info.image.imageLayout = IImage::LAYOUT::GENERAL; + + const IGPUDescriptorSet::SWriteDescriptorSet writes[] = { + { + .dstSet = genLumaDescriptorSet.get(), .binding = 0, .count = 1, .info = &envMapDescriptorInfo + }, + { + .dstSet = genLumaDescriptorSet.get(), .binding = 1, .count = 1, .info = &lumaMapGeneralDescriptorInfo + }, + { + .dstSet = genWarpDescriptorSet.get(), .binding = 0, .count = 1, .info = &lumaMapReadDescriptorInfo + }, + { + .dstSet = genWarpDescriptorSet.get(), .binding = 1, .count = 1, .info = &warpMapDescriptorInfo + }, + }; + + device->updateDescriptorSets(writes, {}); + + constructorParams.genLumaDescriptorSet = genLumaDescriptorSet; + constructorParams.genWarpDescriptorSet = genWarpDescriptorSet; + + constructorParams.creationParams = std::move(params); + + return core::smart_refctd_ptr(new EnvmapSampler(std::move(constructorParams))); +} + +core::smart_refctd_ptr EnvmapSampler::createLumaMap(video::ILogicalDevice* device, asset::VkExtent3D extent, uint32_t mipCount, const std::string_view debugName) +{ + return createTexture(device, extent, EF_R32_SFLOAT, mipCount); +} + +core::smart_refctd_ptr EnvmapSampler::createWarpMap(video::ILogicalDevice* device, asset::VkExtent3D extent, const std::string_view debugName) +{ + return createTexture(device, extent, EF_R32G32_SFLOAT); +} + +smart_refctd_ptr EnvmapSampler::mount(core::smart_refctd_ptr logger, ISystem* system, video::ILogicalDevice* device, const std::string_view archiveAlias) +{ + assert(system); + + if (!system) + return nullptr; + + auto archive = make_smart_refctd_ptr(std::string_view("nbl/builtin/hlsl/sampling/hierarchical_image"), smart_refctd_ptr(logger), system); + + system->mount(smart_refctd_ptr(archive), archiveAlias.data()); + return smart_refctd_ptr(archive); +} + +core::smart_refctd_ptr EnvmapSampler::createGenLumaPipeline(const SCreationParameters& params, const video::IGPUPipelineLayout* pipelineLayout) +{ + system::logger_opt_ptr logger = params.utilities->getLogger(); + auto system = smart_refctd_ptr(params.assetManager->getSystem()); + auto* device = params.utilities->getLogicalDevice(); + mount(smart_refctd_ptr(params.utilities->getLogger()), system.get(), params.utilities->getLogicalDevice(), NBL_EXT_MOUNT_ENTRY); + + const auto shaderSource = getShaderSource(params.assetManager.get(), "gen_luma.comp.hlsl", logger.get()); + auto compiler = make_smart_refctd_ptr(smart_refctd_ptr(system)); + CHLSLCompiler::SOptions options = {}; + options.stage = IShader::E_SHADER_STAGE::ESS_COMPUTE; + options.preprocessorOptions.targetSpirvVersion = device->getPhysicalDevice()->getLimits().spirvVersion; + options.spirvOptimizer = nullptr; + +#ifndef _NBL_DEBUG + ISPIRVOptimizer::E_OPTIMIZER_PASS optPasses = ISPIRVOptimizer::EOP_STRIP_DEBUG_INFO; + auto opt = make_smart_refctd_ptr(std::span(&optPasses, 1)); + options.spirvOptimizer = opt.get(); +#else + options.debugInfoFlags |= IShaderCompiler::E_DEBUG_INFO_FLAGS::EDIF_LINE_BIT; +#endif + options.preprocessorOptions.sourceIdentifier = shaderSource->getFilepathHint(); + options.preprocessorOptions.logger = logger.get(); + options.preprocessorOptions.includeFinder = compiler->getDefaultIncludeFinder(); + + const auto workgroupDimStr = std::to_string(params.genLumaMapWorkgroupDimension); + const IShaderCompiler::SMacroDefinition defines[] = { + { "WORKGROUP_DIM", workgroupDimStr.data() }, + }; + + options.preprocessorOptions.extraDefines = defines; + + const auto overridenUnspecialized = compiler->compileToSPIRV((const char*)shaderSource->getContent()->getPointer(), options); + const auto shader = device->compileShader({ overridenUnspecialized.get() }); + if (!shader) + { + logger.log("Could not compile shaders!", ILogger::ELL_ERROR); + return nullptr; + } + + video::IGPUComputePipeline::SCreationParams pipelineParams[1] = {}; + pipelineParams[0].layout = pipelineLayout; + pipelineParams[0].shader = { .shader = shader.get(), .entryPoint = "main" }; + + smart_refctd_ptr pipeline; + params.utilities->getLogicalDevice()->createComputePipelines(nullptr, pipelineParams, &pipeline); + if (!pipeline) + { + logger.log("Could not create pipeline!", ILogger::ELL_ERROR); + return nullptr; + } + + return pipeline; +} + +core::smart_refctd_ptr EnvmapSampler::createGenWarpPipeline(const SCreationParameters& params, const video::IGPUPipelineLayout* pipelineLayout) +{ + system::logger_opt_ptr logger = params.utilities->getLogger(); + auto system = smart_refctd_ptr(params.assetManager->getSystem()); + auto* device = params.utilities->getLogicalDevice(); + mount(smart_refctd_ptr(params.utilities->getLogger()), system.get(), params.utilities->getLogicalDevice(), NBL_EXT_MOUNT_ENTRY); + + const auto shaderSource = getShaderSource(params.assetManager.get(), "gen_warp.comp.hlsl", logger.get()); + auto compiler = make_smart_refctd_ptr(smart_refctd_ptr(system)); + CHLSLCompiler::SOptions options = {}; + options.stage = IShader::E_SHADER_STAGE::ESS_COMPUTE; + options.preprocessorOptions.targetSpirvVersion = device->getPhysicalDevice()->getLimits().spirvVersion; + options.spirvOptimizer = nullptr; + +#ifndef _NBL_DEBUG + ISPIRVOptimizer::E_OPTIMIZER_PASS optPasses = ISPIRVOptimizer::EOP_STRIP_DEBUG_INFO; + auto opt = make_smart_refctd_ptr(std::span(&optPasses, 1)); + options.spirvOptimizer = opt.get(); +#else + options.debugInfoFlags |= IShaderCompiler::E_DEBUG_INFO_FLAGS::EDIF_LINE_BIT; +#endif + options.preprocessorOptions.sourceIdentifier = shaderSource->getFilepathHint(); + options.preprocessorOptions.logger = logger.get(); + options.preprocessorOptions.includeFinder = compiler->getDefaultIncludeFinder(); + + const auto workgroupDimStr = std::to_string(params.genWarpMapWorkgroupDimension); + const IShaderCompiler::SMacroDefinition defines[] = { + { "WORKGROUP_DIM", workgroupDimStr.data() }, + }; + + options.preprocessorOptions.extraDefines = defines; + + const auto overridenUnspecialized = compiler->compileToSPIRV((const char*)shaderSource->getContent()->getPointer(), options); + const auto shader = device->compileShader({ overridenUnspecialized.get() }); + if (!shader) + { + logger.log("Could not compile shaders!", ILogger::ELL_ERROR); + return nullptr; + } + + video::IGPUComputePipeline::SCreationParams pipelineParams[1] = {}; + pipelineParams[0].layout = pipelineLayout; + pipelineParams[0].shader = { .shader = shader.get(), .entryPoint = "main" }; + + smart_refctd_ptr pipeline; + params.utilities->getLogicalDevice()->createComputePipelines(nullptr, pipelineParams, &pipeline); + if (!pipeline) + { + logger.log("Could not create pipeline!", ILogger::ELL_ERROR); + return nullptr; + } + + return pipeline; +} + +core::smart_refctd_ptr < video::IGPUPipelineLayout> EnvmapSampler::createGenLumaPipelineLayout(video::ILogicalDevice* device) +{ + asset::SPushConstantRange pcRange = { + .stageFlags = hlsl::ESS_COMPUTE, + .offset = 0, + .size = sizeof(SLumaGenPushConstants) + }; + + const IGPUDescriptorSetLayout::SBinding bindings[] = { + { + .binding = 0u, + .type = nbl::asset::IDescriptor::E_TYPE::ET_SAMPLED_IMAGE, + .createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, + .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, + .count = 1u + }, + { + .binding = 1u, + .type = nbl::asset::IDescriptor::E_TYPE::ET_STORAGE_IMAGE, + .createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, + .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, + .count = 1u + } + }; + + const auto setLayout = device->createDescriptorSetLayout(bindings); + return device->createPipelineLayout({ &pcRange, 1 }, setLayout); + +} + +core::smart_refctd_ptr EnvmapSampler::createGenWarpPipelineLayout(video::ILogicalDevice* device) +{ + const IGPUDescriptorSetLayout::SBinding bindings[] = { + { + .binding = 0u, + .type = nbl::asset::IDescriptor::E_TYPE::ET_SAMPLED_IMAGE, + .createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, + .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, + .count = 1u, + }, + { + .binding = 1u, + .type = nbl::asset::IDescriptor::E_TYPE::ET_STORAGE_IMAGE, + .createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, + .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, + .count = 1u + } + }; + + const auto setLayout = device->createDescriptorSetLayout(bindings); + return device->createPipelineLayout({}, setLayout, nullptr, nullptr, nullptr); +} + +void EnvmapSampler::computeWarpMap(video::IQueue* queue) +{ + const auto logicalDevice = m_cachedCreationParams.utilities->getLogicalDevice(); + + core::smart_refctd_ptr cmdBuf; + { + // commandbuffer should refcount the pool, so it should be 100% legal to drop at the end of the scope + auto gpuCommandPool = logicalDevice->createCommandPool(queue->getFamilyIndex(),IGPUCommandPool::CREATE_FLAGS::TRANSIENT_BIT); + if (!gpuCommandPool) + { + if (auto* logger = logicalDevice->getLogger()) + logger->log("Compute Warpmap: failed to create command pool.", system::ILogger::ELL_ERROR); + return; + } + gpuCommandPool->createCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY, 1u, &cmdBuf); + if (!cmdBuf) + { + if (auto* logger = logicalDevice->getLogger()) + logger->log("Compute Warpmap: failed to create command buffer.", system::ILogger::ELL_ERROR); + return; + } + } + + if (!cmdBuf->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT)) + { + if (auto* logger = logicalDevice->getLogger()) + logger->log("Compute Warpmap: failed to begin command buffer.", system::ILogger::ELL_ERROR); + return; + } + + const auto lumaMapImage = m_lumaMap->getCreationParameters().image.get(); + const auto lumaMapMipLevels = lumaMapImage->getCreationParameters().mipLevels; + const auto lumaMapExtent = lumaMapImage->getCreationParameters().extent; + + const auto warpMapImage = m_warpMap->getCreationParameters().image.get(); + + { + IGPUCommandBuffer::SPipelineBarrierDependencyInfo::image_barrier_t barriers[] = { + { + .barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::NONE, + .srcAccessMask = ACCESS_FLAGS::NONE, + .dstStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, + .dstAccessMask = ACCESS_FLAGS::SHADER_WRITE_BITS + } + }, + .image = lumaMapImage, + .subresourceRange = { + .aspectMask = IImage::EAF_COLOR_BIT, + .baseMipLevel = 0u, + .levelCount = lumaMapMipLevels, + .baseArrayLayer = 0u, + .layerCount = 1u + }, + .oldLayout = IImage::LAYOUT::UNDEFINED, + .newLayout = IImage::LAYOUT::GENERAL, + } + }; + cmdBuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = barriers }); + } + + // Gen Luma Map + { + SLumaGenPushConstants pcData = {}; + pcData.lumaRGBCoefficients = { 0.2126729f, 0.7151522f, 0.0721750f }; + pcData.lumaMapResolution = {lumaMapExtent.width, lumaMapExtent.height}; + + cmdBuf->bindComputePipeline(m_genLumaPipeline.get()); + cmdBuf->pushConstants(m_genLumaPipeline->getLayout(), IShader::E_SHADER_STAGE::ESS_COMPUTE, + 0, sizeof(SLumaGenPushConstants), &pcData); + cmdBuf->bindDescriptorSets(EPBP_COMPUTE, m_genLumaPipeline->getLayout(), + 0, 1, &m_genLumaDescriptorSet.get()); + cmdBuf->dispatch(m_lumaWorkgroupCount.x, m_lumaWorkgroupCount.y, 1); + } + + // Generate luminance mip map + { + IGPUCommandBuffer::SPipelineBarrierDependencyInfo::image_barrier_t barriers[] = { + { + .barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, + .srcAccessMask = ACCESS_FLAGS::SHADER_WRITE_BITS, + .dstStageMask = PIPELINE_STAGE_FLAGS::BLIT_BIT, + .dstAccessMask = ACCESS_FLAGS::TRANSFER_READ_BIT + } + }, + .image = lumaMapImage, + .subresourceRange = { + .aspectMask = IImage::EAF_COLOR_BIT, + .baseMipLevel = 0u, + .levelCount = 1, + .baseArrayLayer = 0u, + .layerCount = 1u + }, + .oldLayout = IImage::LAYOUT::GENERAL, + .newLayout = IImage::LAYOUT::TRANSFER_SRC_OPTIMAL, + }, + { + .barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::NONE, + .srcAccessMask = ACCESS_FLAGS::NONE, + .dstStageMask = PIPELINE_STAGE_FLAGS::BLIT_BIT, + .dstAccessMask = ACCESS_FLAGS::TRANSFER_WRITE_BIT + } + }, + .image = lumaMapImage, + .subresourceRange = { + .aspectMask = IImage::EAF_COLOR_BIT, + .baseMipLevel = 1u, + .levelCount = lumaMapMipLevels - 1, + .baseArrayLayer = 0u, + .layerCount = 1u + }, + .oldLayout = IImage::LAYOUT::GENERAL, + .newLayout = IImage::LAYOUT::TRANSFER_DST_OPTIMAL, + } + }; + cmdBuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = barriers }); + generateMipmap(cmdBuf.get(), lumaMapImage); + } + + core::smart_refctd_ptr lumaTexelBuffer; + const auto lumaMapLastMip = lumaMapMipLevels - 1; + const auto lumaMapLastMipExtent = lumaMapImage->getMipSize(lumaMapLastMip); + const auto lumaMapLastTexelCount = lumaMapLastMipExtent.x * lumaMapLastMipExtent.y * lumaMapLastMipExtent.z; + { + IGPUImage::SBufferCopy region = {}; + region.imageSubresource.aspectMask = IImage::EAF_COLOR_BIT; + region.imageSubresource.mipLevel = lumaMapLastMip; + region.imageSubresource.baseArrayLayer = 0; + region.imageSubresource.layerCount = 1; + region.imageExtent = { lumaMapLastMipExtent.x, lumaMapLastMipExtent.y, lumaMapLastMipExtent.z }; + + IGPUBuffer::SCreationParams bufferCreationParams = {}; + bufferCreationParams.size = lumaMapLastTexelCount * getTexelOrBlockBytesize(EF_R32_SFLOAT); + bufferCreationParams.usage = IGPUBuffer::EUF_TRANSFER_DST_BIT; + lumaTexelBuffer = logicalDevice->createBuffer(std::move(bufferCreationParams)); + if (!lumaTexelBuffer) + { + if (auto* logger = logicalDevice->getLogger()) + logger->log("ScreenShot: failed to create GPU texel buffer.", system::ILogger::ELL_ERROR); + return; + } + auto gpuTexelBufferMemReqs = lumaTexelBuffer->getMemoryReqs(); + gpuTexelBufferMemReqs.memoryTypeBits &= logicalDevice->getPhysicalDevice()->getDownStreamingMemoryTypeBits(); + if (!gpuTexelBufferMemReqs.memoryTypeBits) + { + if (auto* logger = logicalDevice->getLogger()) + logger->log("ScreenShot: no down-streaming memory type for texel buffer.", system::ILogger::ELL_ERROR); + return; + } + auto gpuTexelBufferMem = logicalDevice->allocate(gpuTexelBufferMemReqs, lumaTexelBuffer.get()); + if (!gpuTexelBufferMem.isValid()) + { + if (auto* logger = logicalDevice->getLogger()) + logger->log("ScreenShot: failed to allocate texel buffer memory.", system::ILogger::ELL_ERROR); + return; + } + + IGPUCommandBuffer::SPipelineBarrierDependencyInfo info = {}; + decltype(info)::image_barrier_t barrier = {}; + info.imgBarriers = { &barrier, &barrier + 1 }; + + { + barrier.barrier.dep.srcStageMask = PIPELINE_STAGE_FLAGS::BLIT_BIT; + barrier.barrier.dep.srcAccessMask = ACCESS_FLAGS::TRANSFER_WRITE_BIT; + barrier.barrier.dep.dstStageMask = PIPELINE_STAGE_FLAGS::COPY_BIT; + barrier.barrier.dep.dstAccessMask = ACCESS_FLAGS::TRANSFER_READ_BIT; + barrier.oldLayout = IImage::LAYOUT::TRANSFER_DST_OPTIMAL; + barrier.newLayout = IImage::LAYOUT::TRANSFER_SRC_OPTIMAL; + barrier.image = lumaMapImage; + barrier.subresourceRange.aspectMask = IImage::EAF_COLOR_BIT; + barrier.subresourceRange.baseMipLevel = lumaMapMipLevels - 1; + barrier.subresourceRange.levelCount = 1u; + barrier.subresourceRange.baseArrayLayer = 0; + barrier.subresourceRange.layerCount = 1; + cmdBuf->pipelineBarrier(EDF_NONE,info); + } + cmdBuf->copyImageToBuffer(lumaMapImage,IImage::LAYOUT::TRANSFER_SRC_OPTIMAL,lumaTexelBuffer.get(),1,®ion); + } + + { + IGPUCommandBuffer::SPipelineBarrierDependencyInfo::image_barrier_t barriers[] = { + { + .barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::BLIT_BIT, + .srcAccessMask = ACCESS_FLAGS::TRANSFER_READ_BIT, + .dstStageMask = PIPELINE_STAGE_FLAGS::ALL_COMMANDS_BITS, + .dstAccessMask = ACCESS_FLAGS::SHADER_READ_BITS + } + }, + .image = lumaMapImage, + .subresourceRange = { + .aspectMask = IImage::EAF_COLOR_BIT, + .baseMipLevel = 0u, + .levelCount = lumaMapMipLevels - 1, + .baseArrayLayer = 0u, + .layerCount = 1u + }, + .oldLayout = IImage::LAYOUT::TRANSFER_SRC_OPTIMAL, + .newLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL, + }, + { + .barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::COPY_BIT, + .srcAccessMask = ACCESS_FLAGS::TRANSFER_READ_BIT, + .dstStageMask = PIPELINE_STAGE_FLAGS::ALL_COMMANDS_BITS, + .dstAccessMask = ACCESS_FLAGS::SHADER_READ_BITS + } + }, + .image = lumaMapImage, + .subresourceRange = { + .aspectMask = IImage::EAF_COLOR_BIT, + .baseMipLevel = lumaMapMipLevels - 1, + .levelCount = 1, + .baseArrayLayer = 0u, + .layerCount = 1u + }, + .oldLayout = IImage::LAYOUT::TRANSFER_SRC_OPTIMAL, + .newLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL, + }, + { + .barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::NONE, + .srcAccessMask = ACCESS_FLAGS::NONE, + .dstStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, + .dstAccessMask = ACCESS_FLAGS::SHADER_WRITE_BITS + } + }, + .image = warpMapImage, + .subresourceRange = { + .aspectMask = IImage::EAF_COLOR_BIT, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0u, + .layerCount = 1u + }, + .oldLayout = IImage::LAYOUT::UNDEFINED, + .newLayout = IImage::LAYOUT::GENERAL, + } + }; + cmdBuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = barriers }); + cmdBuf->bindComputePipeline(m_genWarpPipeline.get()); + cmdBuf->bindDescriptorSets(EPBP_COMPUTE, m_genWarpPipeline->getLayout(), + 0, 1, &m_genWarpDescriptorSet.get()); + cmdBuf->dispatch(m_warpWorkgroupCount.x, m_warpWorkgroupCount.y, 1); + } + + { + IGPUCommandBuffer::SPipelineBarrierDependencyInfo::image_barrier_t barriers[] = { + { + .barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, + .srcAccessMask = ACCESS_FLAGS::SHADER_WRITE_BITS, + .dstStageMask = PIPELINE_STAGE_FLAGS::ALL_COMMANDS_BITS, + .dstAccessMask = ACCESS_FLAGS::SHADER_READ_BITS + } + }, + .image = warpMapImage, + .subresourceRange = { + .aspectMask = IImage::EAF_COLOR_BIT, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0u, + .layerCount = 1u + }, + .oldLayout = IImage::LAYOUT::GENERAL, + .newLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL, + } + }; + cmdBuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = barriers }); + } + + if (!cmdBuf->end()) + { + if (auto* logger = logicalDevice->getLogger()) + logger->log("ScreenShot: failed to end command buffer.", system::ILogger::ELL_ERROR); + return; + } + + { + auto signalSemaphore = logicalDevice->createSemaphore(0); + + IQueue::SSubmitInfo info; + IQueue::SSubmitInfo::SCommandBufferInfo cmdBufferInfo{ cmdBuf.get() }; + IQueue::SSubmitInfo::SSemaphoreInfo signalSemaphoreInfo; + signalSemaphoreInfo.semaphore = signalSemaphore.get(); + signalSemaphoreInfo.value = 1; + signalSemaphoreInfo.stageMask = PIPELINE_STAGE_FLAGS::ALL_COMMANDS_BITS; + info.commandBuffers = { &cmdBufferInfo, &cmdBufferInfo + 1 }; + info.signalSemaphores = { &signalSemaphoreInfo, &signalSemaphoreInfo + 1 }; + + if (auto* logger = logicalDevice->getLogger()) + logger->log("Compute Warpmap: submitting copy command buffer.", system::ILogger::ELL_INFO); + if (queue->submit({ &info, &info + 1}) != IQueue::RESULT::SUCCESS) + { + if (auto* logger = logicalDevice->getLogger()) + logger->log("Compute Warpmap: failed to submit copy command buffer.", system::ILogger::ELL_ERROR); + return; + } + + ISemaphore::SWaitInfo waitInfo{ signalSemaphore.get(), 1u}; + + if (auto* logger = logicalDevice->getLogger()) + logger->log("Compute Warpmap: waiting for copy completion.", system::ILogger::ELL_INFO); + if (logicalDevice->blockForSemaphores({&waitInfo, &waitInfo + 1}) != ISemaphore::WAIT_RESULT::SUCCESS) + { + if (auto* logger = logicalDevice->getLogger()) + logger->log("Compute Warpmap: failed to wait for copy completion.", system::ILogger::ELL_ERROR); + return; + } + + auto* allocation = lumaTexelBuffer->getBoundMemory().memory; + const IDeviceMemoryAllocation::MemoryRange range = { 0u, lumaTexelBuffer->getSize() }; + auto* ptr = reinterpret_cast(allocation->map(range, IDeviceMemoryAllocation::EMCAF_READ)); + + m_avgLuma = std::reduce(ptr, ptr + lumaMapLastTexelCount) / float32_t(lumaMapLastTexelCount); + } +} + +nbl::video::IGPUCommandBuffer::SPipelineBarrierDependencyInfo::image_barrier_t EnvmapSampler::getWarpMapBarrier( + core::bitflag dstStageMask, + core::bitflag dstAccessMask, + nbl::video::IGPUImage::LAYOUT newLayout) +{ + const auto warpMapImage = m_warpMap->getCreationParameters().image.get(); + return { + .barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, + .srcAccessMask = ACCESS_FLAGS::SHADER_WRITE_BITS, + .dstStageMask = dstStageMask, + .dstAccessMask = dstAccessMask + } + }, + .image = warpMapImage, + .subresourceRange = { + .aspectMask = IImage::EAF_COLOR_BIT, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0u, + .layerCount = 1u + }, + .oldLayout = IImage::LAYOUT::GENERAL, + .newLayout = newLayout, + }; +} + +nbl::video::IGPUCommandBuffer::SPipelineBarrierDependencyInfo::image_barrier_t EnvmapSampler::getLumaMapBarrier( + core::bitflag dstStageMask, + core::bitflag dstAccessMask, + nbl::video::IGPUImage::LAYOUT newLayout) +{ + const auto lumaMapImage = m_lumaMap->getCreationParameters().image.get(); + return { + .barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, + .srcAccessMask = ACCESS_FLAGS::SHADER_READ_BITS, + .dstStageMask = dstStageMask, + .dstAccessMask = dstAccessMask + } + }, + .image = lumaMapImage, + .subresourceRange = { + .aspectMask = IImage::EAF_COLOR_BIT, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0u, + .layerCount = 1u + }, + .oldLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL, + .newLayout = newLayout, + }; +} + + +} diff --git a/src/nbl/ext/EnvmapImportanceSampling/EnvmapImportanceSampling.cpp b/src/nbl/ext/EnvmapImportanceSampling/EnvmapImportanceSampling.cpp deleted file mode 100644 index f11df5ce15..0000000000 --- a/src/nbl/ext/EnvmapImportanceSampling/EnvmapImportanceSampling.cpp +++ /dev/null @@ -1,426 +0,0 @@ -// Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. -// This file is part of the "Nabla Engine". -// For conditions of distribution and use, see copyright notice in nabla.h - -#include "nbl/ext/EnvmapImportanceSampling/EnvmapImportanceSampling.h" - -#include - -using namespace nbl; -using namespace nbl::asset; -using namespace nbl::video; -using namespace ext::EnvmapImportanceSampling; - - -static core::smart_refctd_ptr createTexture(nbl::video::IVideoDriver* _driver, const VkExtent3D extent, E_FORMAT format, uint32_t mipLevels=1u, uint32_t layers=0u) -{ - const auto real_layers = layers ? layers:1u; - - IGPUImage::SCreationParams imgparams; - imgparams.extent = extent; - imgparams.arrayLayers = real_layers; - imgparams.flags = static_cast(0); - imgparams.format = format; - imgparams.mipLevels = mipLevels; - imgparams.samples = IImage::ESCF_1_BIT; - imgparams.type = IImage::ET_2D; - - IGPUImageView::SCreationParams viewparams; - viewparams.flags = static_cast(0); - viewparams.format = format; - viewparams.image = _driver->createDeviceLocalGPUImageOnDedMem(std::move(imgparams)); - viewparams.viewType = layers ? IGPUImageView::ET_2D_ARRAY:IGPUImageView::ET_2D; - viewparams.subresourceRange.aspectMask = static_cast(0); - viewparams.subresourceRange.baseArrayLayer = 0u; - viewparams.subresourceRange.layerCount = real_layers; - viewparams.subresourceRange.baseMipLevel = 0u; - viewparams.subresourceRange.levelCount = mipLevels; - - return _driver->createGPUImageView(std::move(viewparams)); -} - -void EnvmapImportanceSampling::initResources(core::smart_refctd_ptr envmap, uint32_t lumaGenWorkgroupDimension, uint32_t warpMapGenWorkgroupDimension) -{ - const auto EnvmapExtent = envmap->getCreationParameters().image->getCreationParameters().extent; - // we don't need the 1x1 mip for anything - const uint32_t MipCountLuminance = IImage::calculateFullMipPyramidLevelCount(EnvmapExtent,IImage::ET_2D)-1; - const auto EnvMapPoTExtent = [MipCountLuminance]() -> VkExtent3D - { - const uint32_t width = 0x1u<>1u,1u }; - }(); - auto calcWorkgroups = [](uint32_t* workGroups, const VkExtent3D extent, const uint32_t workgroupDimension) - { - for (auto i=0; i<2; i++) - workGroups[i] = ((&extent.width)[i]-1u)/workgroupDimension+1u; - }; - - // TODO: Can we get away with R16_SFLOAT for the probabilities? - m_luminance = createTexture(m_driver,EnvMapPoTExtent,EF_R32_SFLOAT,MipCountLuminance); - calcWorkgroups(m_lumaWorkgroups,EnvMapPoTExtent,lumaGenWorkgroupDimension); - - // default make the warp-map same resolution as input envmap - // Format needs to be 32bit full precision float, because the Jacobian needs to accurately match PDF - const uint32_t upscale = 0; - const VkExtent3D WarpMapExtent = {EnvMapPoTExtent.width<&& pipelineLayout) -> core::smart_refctd_ptr - { - const char* sourceFmt = - R"===(#version 430 core - -#define LUMA_MIP_MAP_GEN_WORKGROUP_DIM %u -#define WARP_MAP_GEN_WORKGROUP_DIM %u - -#include "%s" - -)==="; - - const size_t extraSize = 2u * 8u + 128u; - auto shader = core::make_smart_refctd_ptr(strlen(sourceFmt) + extraSize + 1u); - snprintf( - reinterpret_cast(shader->getPointer()), shader->getSize(), sourceFmt, - lumaGenWorkgroupDimension, - warpMapGenWorkgroupDimension, - shaderPath - ); - auto gpuShader = m_driver->createGPUShader(core::make_smart_refctd_ptr(std::move(shader), ICPUShader::buffer_contains_glsl)); - if (!gpuShader) - return nullptr; - - auto specializedShader = m_driver->createGPUSpecializedShader(gpuShader.get(), ISpecializedShader::SInfo{ nullptr,nullptr,"main",asset::ISpecializedShader::ESS_COMPUTE }); - if (!specializedShader) - return nullptr; - - return m_driver->createGPUComputePipeline(nullptr,std::move(pipelineLayout),std::move(specializedShader)); - }; - - // Create Everything - { - ISampler::SParams samplerParams; - samplerParams.TextureWrapU = samplerParams.TextureWrapV = samplerParams.TextureWrapW = ISampler::ETC_CLAMP_TO_EDGE; - samplerParams.MinFilter = ISampler::ETF_NEAREST; - samplerParams.MaxFilter = ISampler::ETF_LINEAR; - samplerParams.MipmapMode = ISampler::ESMM_NEAREST; - samplerParams.AnisotropicFilter = 0u; - samplerParams.CompareEnable = false; - - IGPUDescriptorSet::SDescriptorInfo lumaDescriptorInfo = {}; - lumaDescriptorInfo.desc = m_luminance; - lumaDescriptorInfo.image.sampler = nullptr; - - { - auto upscaleSampler = m_driver->createGPUSampler(samplerParams); - - constexpr auto lumaDescriptorCount = 3u; - IGPUDescriptorSetLayout::SBinding bindings[lumaDescriptorCount]; - bindings[0].binding = 0u; - bindings[0].type = asset::EDT_COMBINED_IMAGE_SAMPLER; - bindings[0].stageFlags = ISpecializedShader::ESS_COMPUTE; - bindings[0].count = 1u; - bindings[0].samplers = &upscaleSampler; - - bindings[1].binding = 1u; - bindings[1].type = asset::EDT_STORAGE_BUFFER_DYNAMIC; - bindings[1].stageFlags = ISpecializedShader::ESS_COMPUTE; - bindings[1].count = 1u; - - bindings[2].binding = 2u; - bindings[2].type = asset::EDT_STORAGE_IMAGE; - bindings[2].stageFlags = ISpecializedShader::ESS_COMPUTE; - bindings[2].count = 1u; - - auto lumaDSLayout = m_driver->createGPUDescriptorSetLayout(bindings,bindings+lumaDescriptorCount); - { - SPushConstantRange range{ ISpecializedShader::ESS_COMPUTE,0u,sizeof(nbl_glsl_ext_EnvmapSampling_LumaGenShaderData_t) }; - auto lumaPipelineLayout = m_driver->createGPUPipelineLayout(&range,&range+1u,core::smart_refctd_ptr(lumaDSLayout)); - m_lumaMeasurePipeline = genPipeline("nbl/builtin/glsl/ext/EnvmapImportanceSampling/measure_luma.comp",core::smart_refctd_ptr(lumaPipelineLayout)); - m_lumaGenPipeline = genPipeline("nbl/builtin/glsl/ext/EnvmapImportanceSampling/gen_luma.comp",std::move(lumaPipelineLayout)); - } - m_lumaDS = m_driver->createGPUDescriptorSet(std::move(lumaDSLayout)); - - { - IGPUDescriptorSet::SDescriptorInfo envMapDescriptorInfo = {}; - envMapDescriptorInfo.desc = envmap; - envMapDescriptorInfo.image.sampler = nullptr; - envMapDescriptorInfo.image.imageLayout = asset::EIL_SHADER_READ_ONLY_OPTIMAL; - - IGPUDescriptorSet::SDescriptorInfo lumaMeasurementInfo = {}; - lumaMeasurementInfo.desc = core::smart_refctd_ptr(m_driver->getDefaultDownStreamingBuffer()->getBuffer()); - lumaMeasurementInfo.buffer = {0,calcMeasurementBufferSize()}; - - IGPUDescriptorSet::SWriteDescriptorSet writes[lumaDescriptorCount]; - for (auto i=0u; iupdateDescriptorSets(lumaDescriptorCount,writes,0u,nullptr); - } - } - - { - samplerParams.TextureWrapU = samplerParams.TextureWrapV = samplerParams.TextureWrapW = ISampler::ETC_CLAMP_TO_BORDER; - samplerParams.BorderColor = ISampler::ETBC_FLOAT_OPAQUE_BLACK; - samplerParams.MaxFilter = ISampler::ETF_NEAREST; - auto lumaSampler = m_driver->createGPUSampler(samplerParams); - - constexpr auto warpDescriptorCount = 2u; - IGPUDescriptorSetLayout::SBinding bindings[warpDescriptorCount]; - bindings[0].binding = 0u; - bindings[0].type = asset::EDT_COMBINED_IMAGE_SAMPLER; - bindings[0].stageFlags = ISpecializedShader::ESS_COMPUTE; - bindings[0].count = 1; - bindings[0].samplers = &lumaSampler; - - bindings[1].binding = 1u; - bindings[1].type = asset::EDT_STORAGE_IMAGE; - bindings[1].stageFlags = ISpecializedShader::ESS_COMPUTE; - bindings[1].count = 1u; - - auto warpDSLayout = m_driver->createGPUDescriptorSetLayout(bindings,bindings+warpDescriptorCount); - - m_warpPipeline = genPipeline( - "nbl/builtin/glsl/ext/EnvmapImportanceSampling/gen_warpmap.comp", - m_driver->createGPUPipelineLayout(nullptr,nullptr,core::smart_refctd_ptr(warpDSLayout)) - ); - - m_warpDS = m_driver->createGPUDescriptorSet(std::move(warpDSLayout)); - { - IGPUDescriptorSet::SDescriptorInfo warpMapDescriptorInfo = {}; - warpMapDescriptorInfo.desc = m_warpMap; - warpMapDescriptorInfo.image.sampler = nullptr; - warpMapDescriptorInfo.image.imageLayout = asset::EIL_GENERAL; - - IGPUDescriptorSet::SWriteDescriptorSet writes[warpDescriptorCount]; - for (auto i=0u; iupdateDescriptorSets(warpDescriptorCount,writes,0u,nullptr); - } - } - } -} - -void EnvmapImportanceSampling::deinitResources() -{ - m_lumaMeasurePipeline = nullptr; - m_lumaGenPipeline = nullptr; - m_lumaDS = nullptr; - - m_warpPipeline = nullptr; - m_warpDS = nullptr; - - m_warpMap = nullptr; - m_luminance = nullptr; -} - -bool EnvmapImportanceSampling::computeWarpMap(const float envMapRegularizationFactor, float& pdfNormalizationFactor, float& maxEmittanceLuma) -{ - bool enableRIS = false; - // - nbl_glsl_ext_EnvmapSampling_LumaGenShaderData_t pcData = {}; - pcData.luminanceScales.set(0.2126729f, 0.7151522f, 0.0721750f, 0.0f); - { - const auto imageExtent = m_luminance->getCreationParameters().image->getCreationParameters().extent; - pcData.lumaMapResolution = {imageExtent.width,imageExtent.height}; - } - - auto dynamicOffsets = core::make_refctd_dynamic_array>(1u); - auto lumaDispatch = [&](core::smart_refctd_ptr& pipeline,core::smart_refctd_dynamic_array* dynamicOffsets) - { - m_driver->bindComputePipeline(pipeline.get()); - m_driver->bindDescriptorSets(EPBP_COMPUTE,pipeline->getLayout(),0u,1u,&m_lumaDS.get(),dynamicOffsets); - m_driver->pushConstants(pipeline->getLayout(),ICPUSpecializedShader::ESS_COMPUTE,0u,sizeof(pcData),&pcData); - m_driver->dispatch(m_lumaWorkgroups[0],m_lumaWorkgroups[1],1); - }; - - // 3 seconds is a long time - constexpr uint64_t timeoutInNanoSeconds = 300000000000u; - - // Calculate directionality metric (0 uniform, 1 totally unidirectional) and new Regularization Factor. - // Ideally would want a better metric of how "concentrated" the energy is in one direction rather than variance, so it - // turns out that the first order spherical harmonic band and weighted (by luma) average of directions are the same thing. - float directionalityMetric = [&]() - { - maxEmittanceLuma = 0.f; - - const uint32_t size = calcMeasurementBufferSize(); - // remember that without initializing the address to be allocated to invalid_address you won't get an allocation! - auto downloadStagingArea = m_driver->getDefaultDownStreamingBuffer(); - const auto& address = dynamicOffsets->operator[](0) = std::remove_pointer::type::invalid_address; - // allocate - { - // common page size - const uint32_t alignment = 4096u; - const auto waitPoint = std::chrono::high_resolution_clock::now()+std::chrono::nanoseconds(timeoutInNanoSeconds); - auto unallocatedSize = downloadStagingArea->multi_alloc(waitPoint,1u,dynamicOffsets->data(),&size,&alignment); - if (unallocatedSize) - { - os::Printer::log("Could not download the buffer from the GPU!", ELL_ERROR); - return 0.f; - } - } - auto* data = reinterpret_cast(reinterpret_cast(downloadStagingArea->getBufferPointer())+address); - - // measure into buffer - lumaDispatch(m_lumaMeasurePipeline,&dynamicOffsets); - COpenGLExtensionHandler::pGlMemoryBarrier(GL_ALL_BARRIER_BITS); // TODO: rethink when reimplementing in Vulkan - { - // place and wait for download fence - auto downloadFence = m_driver->placeFence(true); - auto result = downloadFence->waitCPU(timeoutInNanoSeconds,true); - // - if (result==E_DRIVER_FENCE_RETVAL::EDFR_TIMEOUT_EXPIRED || result==E_DRIVER_FENCE_RETVAL::EDFR_FAIL) - { - os::Printer::log("Could not download the buffer from the GPU, fence not signalled!", ELL_ERROR); - downloadStagingArea->multi_free(1u,&address,&size,nullptr); - return 0.f; - } - // then invalidate the CPU cache of the mapping - if (downloadStagingArea->needsManualFlushOrInvalidate()) - m_driver->invalidateMappedMemoryRanges({ {downloadStagingArea->getBuffer()->getBoundMemory(),address,size} }); - } - - // reduce - core::vectorSIMDf avgDir; - { - const auto reduction = std::reduce( - data,data+size/sizeof(nbl_glsl_ext_EnvmapSampling_LumaMeasurement_t), - nbl_glsl_ext_EnvmapSampling_LumaMeasurement_t{0.f,0.f,0.f,0.f,0.f}, - [](nbl_glsl_ext_EnvmapSampling_LumaMeasurement_t lhs, const nbl_glsl_ext_EnvmapSampling_LumaMeasurement_t& rhs){ - lhs.xDirSum += rhs.xDirSum; - lhs.yDirSum += rhs.yDirSum; - lhs.zDirSum += rhs.zDirSum; - lhs.weightSum += rhs.weightSum; - if (lhs.maxLumamulti_free(1u,&address,&size,nullptr); - - avgDir /= avgDir.wwww(); - avgDir.w = 0.f; - // should it be length or length squared? - const float directionality = core::length(avgDir)[0]; - std::cout << "Final Luminance Directionality = " << directionality << std::endl; - // the only reason why we'd get a NaN would be because there's literally 0 luminance in the image - return core::isnan(directionality) ? 0.f:directionality; - }(); - - const float regularizationFactor = core::min(envMapRegularizationFactor*directionalityMetric,envMapRegularizationFactor); - std::cout << "New Regularization Factor based on Directionality = " << regularizationFactor << std::endl; - - constexpr float regularizationThreshold = 0.00001f; - enableRIS = regularizationFactor>=regularizationThreshold; - - // Calc Luma again with new Regularization Factor - { - pcData.luminanceScales *= regularizationFactor; - pcData.luminanceScales.w = 1.f-regularizationFactor; - lumaDispatch(m_lumaGenPipeline,&dynamicOffsets); - COpenGLExtensionHandler::pGlMemoryBarrier(GL_ALL_BARRIER_BITS); // TODO: rethink when reimplementing in Vulkan - } - - // Calc Mipmaps - m_luminance->regenerateMipMapLevels(); - - // Download last mip level and get avg from it - { - const auto lumaImage = m_luminance->getCreationParameters().image; - - // - IImage::SBufferCopy copyRegion = {}; - { - copyRegion.bufferRowLength = 0u; - copyRegion.bufferImageHeight = 0u; - //copyRegion.imageSubresource.aspectMask = wait for Vulkan; - copyRegion.imageSubresource.mipLevel = lumaImage->getCreationParameters().mipLevels-1u; - copyRegion.imageSubresource.baseArrayLayer = 0u; - copyRegion.imageSubresource.layerCount = lumaImage->getCreationParameters().arrayLayers; - copyRegion.imageOffset = { 0u,0u,0u }; - const auto extent = lumaImage->getMipSize(copyRegion.imageSubresource.mipLevel); - copyRegion.imageExtent = { extent.x,extent.y,extent.z }; - } - const uint32_t lastMipTexelCount = copyRegion.imageSubresource.layerCount*copyRegion.imageExtent.depth*copyRegion.imageExtent.height*copyRegion.imageExtent.width; - const uint32_t size = lastMipTexelCount*asset::getTexelOrBlockBytesize(lumaImage->getCreationParameters().format); - - // remember that without initializing the address to be allocated to invalid_address you won't get an allocation! - auto downloadStagingArea = m_driver->getDefaultDownStreamingBuffer(); - uint32_t address = std::remove_pointer::type::invalid_address; - // allocate - { - // common page size - const uint32_t alignment = 4096u; - const auto waitPoint = std::chrono::high_resolution_clock::now()+std::chrono::nanoseconds(timeoutInNanoSeconds); - auto unallocatedSize = downloadStagingArea->multi_alloc(waitPoint,1u,&address,&size,&alignment); - if (unallocatedSize) - { - os::Printer::log("Could not download the last luma mip map level from the GPU!", ELL_ERROR); - return core::nan(); - } - } - - // - copyRegion.bufferOffset = address; - m_driver->copyImageToBuffer(lumaImage.get(),downloadStagingArea->getBuffer(),1,©Region); - - // place and wait for download fence - { - auto downloadFence = m_driver->placeFence(true); - auto result = downloadFence->waitCPU(timeoutInNanoSeconds,true); - // - if (result==E_DRIVER_FENCE_RETVAL::EDFR_TIMEOUT_EXPIRED || result==E_DRIVER_FENCE_RETVAL::EDFR_FAIL) - { - os::Printer::log("Could not download the last luma mip map level from the GPU! Fence not Signalled!", ELL_ERROR); - downloadStagingArea->multi_free(1u,&address,&size,nullptr); - return core::nan(); - } - // then invalidate the CPU cache of the mapping - if (downloadStagingArea->needsManualFlushOrInvalidate()) - m_driver->invalidateMappedMemoryRanges({ {downloadStagingArea->getBuffer()->getBoundMemory(),address,size} }); - } - - // - { - const float* r32fData = reinterpret_cast(reinterpret_cast(downloadStagingArea->getBufferPointer())+address); - const auto avgVal = std::reduce(r32fData,r32fData+lastMipTexelCount)/float(lastMipTexelCount); - pdfNormalizationFactor = 1.0/(2.0*core::PI()*core::PI()*avgVal); - } - downloadStagingArea->multi_free(1u,&address,&size,nullptr); - } - - // Generate WarpMap - { - m_driver->bindComputePipeline(m_warpPipeline.get()); - m_driver->bindDescriptorSets(EPBP_COMPUTE,m_warpPipeline->getLayout(),0u,1u,&m_warpDS.get(),nullptr); - m_driver->dispatch(m_warpWorkgroups[0],m_warpWorkgroups[1],1); - COpenGLExtensionHandler::pGlMemoryBarrier(GL_ALL_BARRIER_BITS); // TODO: rethink when reimplementing in Vulkan - } - - return enableRIS; -} - -