Devsh-Graphics-Programming · kevyuu · Nov 11, 2025 · Nov 11, 2025 · Nov 18, 2025 · Nov 18, 2025
diff --git a/3rdparty/Vulkan-Headers b/3rdparty/Vulkan-Headers
diff --git a/3rdparty/Vulkan-Tools b/3rdparty/Vulkan-Tools
diff --git a/3rdparty/openexr b/3rdparty/openexr
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -181,6 +181,7 @@ option(NBL_BUILD_EXAMPLES "Enable building examples" ON)
 option(NBL_BUILD_MITSUBA_LOADER "Enable nbl::ext::MitsubaLoader?" ON)
 option(NBL_BUILD_IMGUI "Enable nbl::ext::ImGui?" ON)
 option(NBL_BUILD_DEBUG_DRAW "Enable Nabla Debug Draw extension?" ON)
+option(NBL_BUILD_ENVMAP_IMPORTANCE_SAMPLING "Enable Nabla Envmap Importance Sampling extension?" ON)
 
 option(NBL_BUILD_OPTIX "Enable nbl::ext::OptiX?" OFF)
 if(NBL_COMPILE_WITH_CUDA)

diff --git a/examples_tests b/examples_tests
diff --git a/include/nbl/builtin/hlsl/concepts/accessors/generic_shared_data.hlsl b/include/nbl/builtin/hlsl/concepts/accessors/generic_shared_data.hlsl
@@ -69,7 +69,7 @@ NBL_CONCEPT_END(
 #include <nbl/builtin/hlsl/concepts/__end.hlsl>
 
 template<typename T, typename V, typename I=uint32_t>
-NBL_BOOL_CONCEPT GenericDataAccessor = GenericWriteAccessor<T,V,I> && GenericWriteAccessor<T,V,I>;
+NBL_BOOL_CONCEPT GenericDataAccessor = GenericReadAccessor<T,V,I> && GenericWriteAccessor<T,V,I>;
 
 }
 }

diff --git a/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl b/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl
@@ -0,0 +1,193 @@
+// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O.
+// This file is part of the "Nabla Engine".
+// For conditions of distribution and use, see copyright notice in nabla.h
+
+#ifndef _NBL_BUILTIN_HLSL_SAMPLING_HIERARCHICAL_IMAGE_INCLUDED_
+#define _NBL_BUILTIN_HLSL_SAMPLING_HIERARCHICAL_IMAGE_INCLUDED_
+
+#include <nbl/builtin/hlsl/sampling/basic.hlsl>
+#include <nbl/builtin/hlsl/sampling/warp.hlsl>
+#include <nbl/builtin/hlsl/sampling/hierarchical_image/accessors.hlsl>
+#include <nbl/builtin/hlsl/cpp_compat/intrinsics.hlsl>
+
+namespace nbl
+{
+namespace hlsl
+{
+namespace sampling
+{
+
+template <typename ScalarT, typename LuminanceAccessorT 
+  NBL_PRIMARY_REQUIRES(
+		is_scalar_v<ScalarT> && 
+		hierarchical_image::LuminanceReadAccessor<LuminanceAccessorT, ScalarT>
+	)
+struct LuminanceMapSampler
+{
+	using scalar_type = ScalarT;
+	using vector2_type = vector<scalar_type, 2>;
+	using vector4_type = vector<scalar_type, 4>;
+
+	LuminanceAccessorT _map;
+	uint32_t2 _mapSize;
+  uint32_t2 _lastWarpPixel;
+	bool _aspect2x1;
+
+	static LuminanceMapSampler<ScalarT, LuminanceAccessorT> create(NBL_CONST_REF_ARG(LuminanceAccessorT) lumaMap, uint32_t2 mapSize, bool aspect2x1, uint32_t2 warpSize)
+	{
+	  LuminanceMapSampler<ScalarT, LuminanceAccessorT> result;
+	  result._map = lumaMap;
+	  result._mapSize = mapSize;
+    result._lastWarpPixel = warpSize - uint32_t2(1, 1);
+	  result._aspect2x1 = aspect2x1;
+	  return result;
+	}
+
+	static bool choseSecond(scalar_type first, scalar_type second, NBL_REF_ARG(scalar_type) xi)
+	{
+		// numerical resilience against IEEE754
+		scalar_type dummy = scalar_type(0);
+		PartitionRandVariable<scalar_type> partition;
+		partition.leftProb = scalar_type(1) / (scalar_type(1) + (second / first));
+		return partition(xi, dummy);
+	}
+
+	vector2_type binarySearch(const uint32_t2 coord)
+	{
+		// We use _lastWarpPixel here for corner sampling
+    float32_t2 xi = float32_t2(coord)/ _lastWarpPixel;
+		uint32_t2 p = uint32_t2(0, 0);
+		const uint32_t2 mip2x1 = findMSB(_mapSize.y);
+
+		if (_aspect2x1) {
+			// do one split in the X axis first cause penultimate full mip would have been 2x1
+			p.x = choseSecond(_map.texelFetch(uint32_t2(0, 0), mip2x1), _map.texelFetch(uint32_t2(1, 0), mip2x1), xi.x) ? 1 : 0;
+		}
+
+		for (int i = mip2x1 - 1; i >= 0; i--)
+		{
+			p <<= 1;
+			const vector4_type values = _map.texelGather(p, i);
+			scalar_type wx_0, wx_1;
+			{
+				const scalar_type wy_0 = values[3] + values[2];
+				const scalar_type wy_1 = values[1] + values[0];
+				if (choseSecond(wy_0, wy_1, xi.y))
+				{
+					p.y |= 1;
+					wx_0 = values[0];
+					wx_1 = values[1];
+				}
+				else
+				{
+					wx_0 = values[3];
+					wx_1 = values[2];
+				}
+      }
+      if (choseSecond(wx_0, wx_1, xi.x))
+        p.x |= 1;
+		}
+
+
+		// If we don`t add xi, the sample will clump to the lowest corner of environment map texel. We add xi to simulate uniform distribution within a pixel and make the sample continuous. This is why we compute the pdf not from the normalized luminance of the texel, instead from the reciprocal of the Jacobian.
+		const vector2_type directionUV = (vector2_type(p.x, p.y) + xi) / vector2_type(_mapSize);
+		return directionUV;
+	}
+
+	matrix<scalar_type, 4, 2> sampleUvs(uint32_t2 sampleCoord) NBL_CONST_MEMBER_FUNC
+	{
+		const vector2_type dir0 = binarySearch(sampleCoord + vector2_type(0, 1));
+		const vector2_type dir1 = binarySearch(sampleCoord + vector2_type(1, 1));
+		const vector2_type dir2 = binarySearch(sampleCoord + vector2_type(1, 0));
+		const vector2_type dir3 = binarySearch(sampleCoord);
+		return matrix<scalar_type, 4, 2>(
+			dir0,
+			dir1,
+			dir2,
+			dir3
+		);
+	}
+};
+
+template <typename ScalarT, typename LuminanceAccessorT, typename HierarchicalSamplerT, typename PostWarpT 
+  NBL_PRIMARY_REQUIRES(is_scalar_v<ScalarT> &&
+		concepts::accessors::GenericReadAccessor<LuminanceAccessorT, ScalarT, float32_t2> &&
+		hierarchical_image::HierarchicalSampler<HierarchicalSamplerT, ScalarT> &&
+		concepts::Warp<PostWarpT>)
+struct HierarchicalImage 
+{
+	using scalar_type = ScalarT;
+	using vector2_type = vector<ScalarT, 2>;
+	using vector3_type = vector<ScalarT, 3>;
+	using vector4_type = vector<ScalarT, 4>;
+	LuminanceAccessorT _lumaMap;
+	HierarchicalSamplerT _warpMap;
+	uint32_t2 _warpSize;
+	uint32_t2 _lastWarpPixel;
+	scalar_type _rcpAvgLuma;
+
+	static HierarchicalImage create(NBL_CONST_REF_ARG(LuminanceAccessorT) lumaMap, NBL_CONST_REF_ARG(HierarchicalSamplerT) warpMap, uint32_t2 warpSize, scalar_type avgLuma) 
+	{
+		HierarchicalImage<ScalarT, LuminanceAccessorT, HierarchicalSamplerT, PostWarpT> result;
+		result._lumaMap = lumaMap;
+		result._warpMap = warpMap;
+		result._warpSize = warpSize;
+		result._lastWarpPixel = warpSize - uint32_t2(1, 1);
+		result._rcpAvgLuma = ScalarT(1.0) / avgLuma;
+		return result;
+	}
+
+	vector2_type inverseWarp_and_deferredPdf(NBL_REF_ARG(scalar_type) pdf, vector3_type direction) NBL_CONST_MEMBER_FUNC
+  {
+		vector2_type envmapUv = PostWarpT::inverseWarp(direction);
+		scalar_type luma;
+		_lumaMap.get(envmapUv, luma);
+		pdf = (luma * _rcpAvgLuma) * PostWarpT::backwardDensity(direction);
+		return envmapUv;
+  }
+
+	scalar_type deferredPdf(vector3_type direction) NBL_CONST_MEMBER_FUNC
+	{
+		vector2_type envmapUv = PostWarpT::inverseWarp(direction);
+		scalar_type luma;
+		_lumaMap.get(envmapUv, luma);
+		return luma * _rcpAvgLuma * PostWarpT::backwardDensity(direction);
+	}
+
+	vector3_type generate_and_pdf(NBL_REF_ARG(scalar_type) pdf, NBL_REF_ARG(vector2_type) uv, vector2_type xi) NBL_CONST_MEMBER_FUNC
+	{
+		const vector2_type texelCoord = xi * float32_t2(_lastWarpPixel);
+
+		matrix<scalar_type, 4, 2> uvs = _warpMap.sampleUvs(uint32_t2(texelCoord));
+
+		const vector2_type interpolant = frac(texelCoord);
+
+		const vector2_type xDiffs[] = {
+			uvs[2] - uvs[3],
+			uvs[1] - uvs[0]
+		};
+		const vector2_type yVals[] = {
+			xDiffs[0] * interpolant.x + uvs[3],
+			xDiffs[1] * interpolant.x + uvs[0]
+		};
+		const vector2_type yDiff = yVals[1] - yVals[0];
+		uv = yDiff * interpolant.y + yVals[0];
+
+		const WarpResult<vector3_type> warpResult = PostWarpT::warp(uv);
+
+		const scalar_type detInterpolJacobian = determinant(matrix<scalar_type, 2, 2>(
+			lerp(xDiffs[0], xDiffs[1], interpolant.y), // first column dFdx
+			yDiff // second column dFdy
+		)) * _lastWarpPixel.x * _lastWarpPixel.y;
+
+		pdf = abs(warpResult.density / detInterpolJacobian);
+
+		return warpResult.dst;
+	}
+};
+
+}
+}
+}
+
+#endif
diff --git a/include/nbl/builtin/hlsl/sampling/hierarchical_image/accessors.hlsl b/include/nbl/builtin/hlsl/sampling/hierarchical_image/accessors.hlsl
@@ -0,0 +1,62 @@
+#ifndef _NBL_BUILTIN_HLSL_HIERARCHICAL_IMAGE_ACCESSORS_INCLUDED_
+#define _NBL_BUILTIN_HLSL_CONCEPTS_ACCESSORS_HIERARCHICAL_IMAGE_INCLUDED_
+
+#include "nbl/builtin/hlsl/concepts/accessors/generic_shared_data.hlsl"
+
+namespace nbl
+{
+namespace hlsl
+{
+namespace sampling
+{
+namespace hierarchical_image
+{
+// declare concept
+#define NBL_CONCEPT_NAME LuminanceReadAccessor
+#define NBL_CONCEPT_TPLT_PRM_KINDS (typename)(typename)
+#define NBL_CONCEPT_TPLT_PRM_NAMES (U)(ScalarT)
+// not the greatest syntax but works
+#define NBL_CONCEPT_PARAM_0 (a,U)
+#define NBL_CONCEPT_PARAM_1 (coord,uint32_t2)
+#define NBL_CONCEPT_PARAM_2 (level,uint32_t)
+// start concept
+NBL_CONCEPT_BEGIN(3)
+// need to be defined AFTER the concept begins
+#define a NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_0
+#define coord NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_1
+#define level NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_2
+NBL_CONCEPT_END(
+    ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((a.template texelFetch(coord,level)) , ::nbl::hlsl::is_same_v, ScalarT))
+    ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((a.template texelGather(coord,level)) , ::nbl::hlsl::is_same_v, vector<ScalarT, 4>))
+);
+#undef level
+#undef coord
+#undef a
+#include <nbl/builtin/hlsl/concepts/__end.hlsl>
+
+// sampleUvs return 4 UVs in a square to calculate the jacobian matrix
+// declare concept
+#define NBL_CONCEPT_NAME HierarchicalSampler
+#define NBL_CONCEPT_TPLT_PRM_KINDS (typename)(typename)
+#define NBL_CONCEPT_TPLT_PRM_NAMES (HierarchicalSamplerT)(ScalarT)
+// not the greatest syntax but works
+#define NBL_CONCEPT_PARAM_0 (sampler,HierarchicalSamplerT)
+#define NBL_CONCEPT_PARAM_1 (coord,vector<uint32_t, 2>)
+// start concept
+NBL_CONCEPT_BEGIN(2)
+// need to be defined AFTER the concept begins
+#define sampler NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_0
+#define coord NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_1
+NBL_CONCEPT_END(
+    ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((sampler.template sampleUvs(coord)) , ::nbl::hlsl::is_same_v, matrix<ScalarT, 4, 2>))
+);
+#undef sampler
+#undef coord
+#include <nbl/builtin/hlsl/concepts/__end.hlsl>
+
+}
+}
+}
+}
+
+#endif
diff --git a/include/nbl/builtin/hlsl/sampling/hierarchical_image/common.hlsl b/include/nbl/builtin/hlsl/sampling/hierarchical_image/common.hlsl
@@ -0,0 +1,26 @@
+#ifndef _NBL_HLSL_SAMPLING_HIERARCHICAL_IMAGE_COMMON_INCLUDED_
+#define _NBL_HLSL_SAMPLING_HIERARCHICAL_IMAGE_COMMON_INCLUDED_
+
+#include "nbl/builtin/hlsl/cpp_compat.hlsl"
+
+namespace nbl
+{
+namespace hlsl
+{
+namespace sampling
+{
+namespace hierarchical_image
+{
+
+struct SLumaGenPushConstants
+{
+  float32_t3 lumaRGBCoefficients;
+  uint32_t2 lumaMapResolution;
+};
+
+}
+}
+}
+}
+
+#endif
diff --git a/include/nbl/builtin/hlsl/sampling/hierarchical_image/gen_luma.comp.hlsl b/include/nbl/builtin/hlsl/sampling/hierarchical_image/gen_luma.comp.hlsl
@@ -0,0 +1,25 @@
+#include "common.hlsl"
+
+using namespace nbl;
+using namespace nbl::hlsl;
+using namespace nbl::hlsl::sampling::hierarchical_image;
+
+[[vk::push_constant]] SLumaGenPushConstants pc;
+
+[[vk::binding(0, 0)]] Texture2D<float32_t4> envMap;
+[[vk::binding(1, 0)]] RWTexture2D<float32_t> outImage;
+
+[numthreads(WORKGROUP_DIM, WORKGROUP_DIM, 1)]
+[shader("compute")]
+void main(uint32_t3 threadID : SV_DispatchThreadID)
+{	
+	if (all(threadID < pc.lumaMapResolution))
+	{
+
+		const float uv_y = (float(threadID.y) + float(0.5f)) / pc.lumaMapResolution.y;
+		const float32_t3 envMapSample = envMap.Load(float32_t3(threadID.xy, 0));
+		const float32_t luma = hlsl::dot(envMapSample, pc.lumaRGBCoefficients) * sin(numbers::pi<float32_t> * uv_y);
+
+		outImage[threadID.xy] = luma;
+	}
+}
diff --git a/include/nbl/builtin/hlsl/sampling/hierarchical_image/gen_warp.comp.hlsl b/include/nbl/builtin/hlsl/sampling/hierarchical_image/gen_warp.comp.hlsl
@@ -0,0 +1,48 @@
+#include "nbl/builtin/hlsl/sampling/hierarchical_image.hlsl"
+
+[[vk::binding(0, 0)]] Texture2D<float32_t> lumaMap;
+
+[[vk::binding(1, 0)]] RWTexture2D<float32_t2> outImage;
+
+using namespace nbl;
+using namespace nbl::hlsl;
+using namespace nbl::hlsl::sampling;
+
+struct LuminanceAccessor
+{
+    float32_t texelFetch(uint32_t2 coord, uint32_t level)
+    {
+        return lumaMap.Load(uint32_t3(coord, level));
+    }
+
+    float32_t4 texelGather(uint32_t2 coord, uint32_t level)
+    {
+        return float32_t4(
+            lumaMap.Load(uint32_t3(coord, level), uint32_t2(0, 1)),
+            lumaMap.Load(uint32_t3(coord, level), uint32_t2(1, 1)),
+            lumaMap.Load(uint32_t3(coord, level), uint32_t2(1, 0)),
+            lumaMap.Load(uint32_t3(coord, level), uint32_t2(0, 0))
+        );
+
+    }
+};
+
+[numthreads(WORKGROUP_DIM, WORKGROUP_DIM, 1)]
+[shader("compute")]
+void main(uint32_t3 threadID : SV_DispatchThreadID)
+{
+    LuminanceAccessor luminanceAccessor;
+    uint32_t lumaMapWidth, lumaMapHeight;
+
+    lumaMap.GetDimensions(lumaMapWidth, lumaMapHeight);
+
+    using LuminanceSampler = LuminanceMapSampler<float32_t, LuminanceAccessor>;
+
+    LuminanceSampler luminanceSampler = 
+      LuminanceSampler::create(luminanceAccessor, uint32_t2(lumaMapWidth, lumaMapHeight), lumaMapWidth != lumaMapHeight, uint32_t2(lumaMapWidth, lumaMapHeight));
+
+    uint32_t2 pixelCoord = threadID.xy;
+
+    outImage[pixelCoord] = luminanceSampler.binarySearch(pixelCoord);
+
+}
+8 −2		.github/workflows/ci.yml
+59 −40		CMakeLists.txt
+1 −1		include/vk_video/vulkan_video_codec_av1std.h
+235 −447		include/vulkan/vulkan.cppm
+1,022 −1,490		include/vulkan/vulkan.hpp
+1,995 −2,265		include/vulkan/vulkan_core.h
+1,309 −1,462		include/vulkan/vulkan_enums.hpp
+90 −354		include/vulkan/vulkan_extension_inspection.hpp
+3,748 −4,246		include/vulkan/vulkan_funcs.hpp
+2,022 −2,198		include/vulkan/vulkan_handles.hpp
+64 −450		include/vulkan/vulkan_hash.hpp
+13 −15		include/vulkan/vulkan_hpp_macros.hpp
+0 −131		include/vulkan/vulkan_ohos.h
+1,485 −1,921		include/vulkan/vulkan_raii.hpp
+64 −60		include/vulkan/vulkan_shared.hpp
+992 −1,177		include/vulkan/vulkan_static_assertions.hpp
+225 −3,110		include/vulkan/vulkan_structs.hpp
+1,338 −1,527		include/vulkan/vulkan_to_string.hpp
+5 −0		include/vulkan/vulkan_video.cppm
+0 −2		registry/apiconventions.py
+9 −15		registry/base_generator.py
+4 −9		registry/generator.py
+0 −111		registry/reg.py
+1 −2		registry/spec_tools/conventions.py
+280 −2,379		registry/validusage.json
+843 −1,284		registry/vk.xml
+0 −16		registry/vkconventions.py
+4 −11		registry/vulkan_object.py
+4 −0		tests/CMakeLists.txt
+40 −0		31_HLSLPathTracer/CMakeLists.txt
+837 −0		31_HLSLPathTracer/app_resources/glsl/common.glsl
+182 −0		31_HLSLPathTracer/app_resources/glsl/litByRectangle.comp
+60 −0		31_HLSLPathTracer/app_resources/glsl/litBySphere.comp
+105 −0		31_HLSLPathTracer/app_resources/glsl/litByTriangle.comp
+36 −0		31_HLSLPathTracer/app_resources/hlsl/accumulator.hlsl
+50 −0		31_HLSLPathTracer/app_resources/hlsl/common.hlsl
+204 −0		31_HLSLPathTracer/app_resources/hlsl/concepts.hlsl
+409 −0		31_HLSLPathTracer/app_resources/hlsl/example_common.hlsl
+74 −0		31_HLSLPathTracer/app_resources/hlsl/intersector.hlsl
+242 −0		31_HLSLPathTracer/app_resources/hlsl/material_system.hlsl
+458 −0		31_HLSLPathTracer/app_resources/hlsl/next_event_estimator.hlsl
+268 −0		31_HLSLPathTracer/app_resources/hlsl/pathtracer.hlsl
+19 −0		31_HLSLPathTracer/app_resources/hlsl/present.frag.hlsl
+51 −0		31_HLSLPathTracer/app_resources/hlsl/rand_gen.hlsl
+75 −0		31_HLSLPathTracer/app_resources/hlsl/ray_gen.hlsl
+335 −0		31_HLSLPathTracer/app_resources/hlsl/render.comp.hlsl
+31 −0		31_HLSLPathTracer/app_resources/hlsl/render_common.hlsl
+17 −0		31_HLSLPathTracer/app_resources/hlsl/render_rwmc_common.hlsl
+66 −0		31_HLSLPathTracer/app_resources/hlsl/resolve.comp.hlsl
+15 −0		31_HLSLPathTracer/app_resources/hlsl/resolve_common.hlsl
+7 −0		31_HLSLPathTracer/app_resources/hlsl/rwmc_global_settings_common.hlsl
+252 −0		31_HLSLPathTracer/app_resources/hlsl/scene.hlsl
+28 −0		31_HLSLPathTracer/config.json.template
+17 −0		31_HLSLPathTracer/include/nbl/this_example/common.hpp
+167 −0		31_HLSLPathTracer/include/nbl/this_example/transform.hpp
+1,748 −0		31_HLSLPathTracer/main.cpp
+50 −0		31_HLSLPathTracer/pipeline.groovy
+17 −1		40_PathTracer/src/renderer/CRenderer.cpp
+72 −0		74_EnvmapImportanceSampling/CMakeLists.txt
+33 −0		74_EnvmapImportanceSampling/app_resources/common.hlsl
+24 −0		74_EnvmapImportanceSampling/app_resources/present.frag.hlsl
+103 −0		74_EnvmapImportanceSampling/app_resources/test.comp.hlsl
+28 −0		74_EnvmapImportanceSampling/config.json.template
+7 −0		74_EnvmapImportanceSampling/imagesTestList.txt
+440 −0		74_EnvmapImportanceSampling/main.cpp
+4 −2		CMakeLists.txt